openmpi/ompi/mca/btl/openib/btl_openib.h

/* -*- Mode: C; c-basic-offset:4 ; -*- */
/*
 * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
 *                         University Research and Technology
 *                         Corporation.  All rights reserved.
 * Copyright (c) 2004-2009 The University of Tennessee and The University
 *                         of Tennessee Research Foundation.  All rights
 *                         reserved.
 * Copyright (c) 2004-2007 High Performance Computing Center Stuttgart,
 *                         University of Stuttgart.  All rights reserved.
 * Copyright (c) 2004-2005 The Regents of the University of California.
 *                         All rights reserved.
 * Copyright (c) 2006-2011 Cisco Systems, Inc.  All rights reserved.
 * Copyright (c) 2006-2009 Mellanox Technologies. All rights reserved.
 * Copyright (c) 2006-2007 Los Alamos National Security, LLC.  All rights
 *                         reserved.
 * Copyright (c) 2006-2007 Voltaire All rights reserved.
 * Copyright (c) 2009-2010 Oracle and/or its affiliates.  All rights reserved.
 * $COPYRIGHT$
 *
 * Additional copyrights may follow
 *
 * $HEADER$
 *
 * @file
 */

#ifndef MCA_BTL_IB_H
#define MCA_BTL_IB_H

#include "ompi_config.h"
#include <sys/types.h>
#include <string.h>
#include <infiniband/verbs.h>

/* Open MPI includes */
#include "ompi/class/ompi_free_list.h"
#include "opal/class/opal_pointer_array.h"
#include "opal/class/opal_hash_table.h"
#include "opal/util/output.h"
#include "opal/mca/event/event.h"
#include "opal/threads/threads.h"
#include "ompi/mca/btl/btl.h"
#include "ompi/mca/mpool/mpool.h"
#include "ompi/mca/btl/base/btl_base_error.h"

#include "ompi/mca/btl/btl.h"
#include "ompi/mca/btl/base/base.h"

#include "connect/connect.h"

BEGIN_C_DECLS

#define HAVE_XRC (1 == OMPI_HAVE_CONNECTX_XRC)

#define MCA_BTL_IB_LEAVE_PINNED 1
#define IB_DEFAULT_GID_PREFIX 0xfe80000000000000ll
#define MCA_BTL_IB_PKEY_MASK 0x7fff


/*--------------------------------------------------------------------*/

#if OPAL_ENABLE_DEBUG
#define ATTACH() do { \
  int i = 0; \
  opal_output(0, "WAITING TO DEBUG ATTACH"); \
  while (i == 0) sleep(5); \
  } while(0);
#else
#define ATTACH()
#endif

/*--------------------------------------------------------------------*/

/**
 * Infiniband (IB) BTL component.
 */

typedef enum {
    MCA_BTL_OPENIB_TRANSPORT_IB,
    MCA_BTL_OPENIB_TRANSPORT_IWARP,
    MCA_BTL_OPENIB_TRANSPORT_RDMAOE,
    MCA_BTL_OPENIB_TRANSPORT_UNKNOWN,
    MCA_BTL_OPENIB_TRANSPORT_SIZE
} mca_btl_openib_transport_type_t;

typedef enum {
    MCA_BTL_OPENIB_PP_QP,
    MCA_BTL_OPENIB_SRQ_QP,
    MCA_BTL_OPENIB_XRC_QP
} mca_btl_openib_qp_type_t;

struct mca_btl_openib_pp_qp_info_t {
    int32_t rd_win;
    int32_t rd_rsv;
}; typedef struct mca_btl_openib_pp_qp_info_t mca_btl_openib_pp_qp_info_t;

struct mca_btl_openib_srq_qp_info_t {
    int32_t sd_max;
    /* The init value for rd_curr_num variables of all SRQs */
    int32_t rd_init;
    /* The watermark, threshold - if the number of WQEs in SRQ is less then this value =>
       the SRQ limit event (IBV_EVENT_SRQ_LIMIT_REACHED) will be generated on corresponding SRQ.
       As result the maximal number of pre-posted WQEs on the SRQ will be increased */
    int32_t srq_limit;
}; typedef struct mca_btl_openib_srq_qp_info_t mca_btl_openib_srq_qp_info_t;

struct mca_btl_openib_qp_info_t {
    mca_btl_openib_qp_type_t type;
    size_t size;
    int32_t rd_num;
    int32_t rd_low;
    union {
        mca_btl_openib_pp_qp_info_t pp_qp;
        mca_btl_openib_srq_qp_info_t srq_qp;
    } u;
}; typedef struct mca_btl_openib_qp_info_t mca_btl_openib_qp_info_t;

#define BTL_OPENIB_QP_TYPE(Q) (mca_btl_openib_component.qp_infos[(Q)].type)
#define BTL_OPENIB_QP_TYPE_PP(Q) \
    (BTL_OPENIB_QP_TYPE(Q) == MCA_BTL_OPENIB_PP_QP)
#define BTL_OPENIB_QP_TYPE_SRQ(Q) \
    (BTL_OPENIB_QP_TYPE(Q) == MCA_BTL_OPENIB_SRQ_QP)
#define BTL_OPENIB_QP_TYPE_XRC(Q) \
    (BTL_OPENIB_QP_TYPE(Q) == MCA_BTL_OPENIB_XRC_QP)

typedef enum {
    BTL_OPENIB_RQ_SOURCE_DEFAULT,
    BTL_OPENIB_RQ_SOURCE_MCA,
    BTL_OPENIB_RQ_SOURCE_DEVICE_INI,
    BTL_OPENIB_RQ_SOURCE_MAX
} btl_openib_receive_queues_source_t;

typedef enum {
    BTL_OPENIB_DT_IB,
    BTL_OPENIB_DT_IWARP,
    BTL_OPENIB_DT_ALL
} btl_openib_device_type_t;

#if OPAL_HAVE_THREADS
/* The structer for manage all BTL SRQs */
typedef struct mca_btl_openib_srq_manager_t {
    opal_mutex_t lock;
    /* The keys of this hash table are addresses of 
       SRQs structures, and the elements are BTL modules
       pointers that associated with these SRQs */
    opal_hash_table_t srq_addr_table;
} mca_btl_openib_srq_manager_t;
#endif

struct mca_btl_openib_component_t {
    mca_btl_base_component_2_0_0_t          super;  /**< base BTL component */

    int                                ib_max_btls;
    /**< maximum number of devices available to openib component */

    int                                ib_num_btls;
    /**< number of devices available to the openib component */

    struct mca_btl_openib_module_t             **openib_btls;
    /**< array of available BTLs */

    opal_pointer_array_t devices; /**< array of available devices */
    int devices_count;

    int ib_free_list_num;
    /**< initial size of free lists */

    int ib_free_list_max;
    /**< maximum size of free lists */

    int ib_free_list_inc;
    /**< number of elements to alloc when growing free lists */

    opal_list_t                             ib_procs;
    /**< list of ib proc structures */

    opal_event_t                            ib_send_event;
    /**< event structure for sends */

    opal_event_t                            ib_recv_event;
    /**< event structure for recvs */

    opal_mutex_t                            ib_lock;
    /**< lock for accessing module state */

    char* ib_mpool_name;
    /**< name of ib memory pool */

    uint8_t num_pp_qps;          /**< number of pp qp's */
    uint8_t num_srq_qps;         /**< number of srq qp's */
    uint8_t num_xrc_qps;         /**< number of xrc qp's */
    uint8_t num_qps;             /**< total number of qp's */

    opal_hash_table_t ib_addr_table; /**< used only for xrc.hash-table that
                                       keeps table of all lids/subnets */
    mca_btl_openib_qp_info_t* qp_infos;

    size_t eager_limit;      /**< Eager send limit of first fragment, in Bytes */
    size_t max_send_size;    /**< Maximum send size, in Bytes */
    uint32_t max_hw_msg_size;/**< Maximum message size for RDMA protocols in Bytes */
    uint32_t reg_mru_len;    /**< Length of the registration cache most recently used list */
    uint32_t use_srq;        /**< Use the Shared Receive Queue (SRQ mode) */

    uint32_t ib_cq_size[2];  /**< Max outstanding CQE on the CQ */

    int32_t ib_max_inline_data; /**< Max size of inline data */
    uint32_t ib_pkey_val;
    uint32_t ib_psn;
    uint32_t ib_qp_ous_rd_atom;
    uint32_t ib_mtu;
    uint32_t ib_min_rnr_timer;
    uint32_t ib_timeout;
    uint32_t ib_retry_count;
    uint32_t ib_rnr_retry;
    uint32_t ib_max_rdma_dst_ops;
    uint32_t ib_service_level;
    uint32_t ib_path_rec_service_level;
    int32_t use_eager_rdma;
    int32_t eager_rdma_threshold; /**< After this number of msg, use RDMA for short messages, always */
    int32_t eager_rdma_num;
    int32_t max_eager_rdma;
    uint32_t btls_per_lid;
    uint32_t max_lmc;
    int32_t apm_lmc;
    int32_t apm_ports;
    uint32_t buffer_alignment;    /**< Preferred communication buffer alignment in Bytes (must be power of two) */
#if OPAL_HAVE_THREADS
    int32_t error_counter;           /**< Counts number on error events that we got on all devices */
    int async_pipe[2];               /**< Pipe for comunication with async event thread */
    int async_comp_pipe[2];          /**< Pipe for async thread comunication with main thread */
    pthread_t   async_thread;        /**< Async thread that will handle fatal errors */
    uint32_t use_async_event_thread; /**< Use the async event handler */
    mca_btl_openib_srq_manager_t srq_manager;     /**< Hash table for all BTL SRQs */
#if BTL_OPENIB_FAILOVER_ENABLED
    uint32_t port_error_failover;    /**< Report port errors to speed up failover */
#endif
#endif
    btl_openib_device_type_t device_type;
    char *if_include;
    char **if_include_list;
    char *if_exclude;
    char **if_exclude_list;
    char *ipaddr_include;
    char *ipaddr_exclude;

    /* MCA param btl_openib_receive_queues */
    char *receive_queues;
    /* Whether we got a non-default value of btl_openib_receive_queues */
    btl_openib_receive_queues_source_t receive_queues_source;

    /** Colon-delimited list of filenames for device parameters */
    char *device_params_file_names;

    /** Whether we're in verbose mode or not */
    bool verbose;

    /** Whether we want a warning if no device-specific parameters are
        found in INI files */
    bool warn_no_device_params_found;
    /** Whether we want a warning if non default GID prefix is not configured
        on multiport setup */
    bool warn_default_gid_prefix;
    /** Whether we want a warning if the user specifies a non-existent
        device and/or port via btl_openib_if_[in|ex]clude MCA params */
    bool warn_nonexistent_if;
    /** Dummy argv-style list; a copy of names from the
        if_[in|ex]clude list that we use for error checking (to ensure
        that they all exist) */
    char **if_list;
    bool use_message_coalescing;
    uint32_t cq_poll_ratio;
    uint32_t cq_poll_progress;
    uint32_t eager_rdma_poll_ratio;
#ifdef HAVE_IBV_FORK_INIT
    /** Whether we want fork support or not */
    int want_fork_support;
#endif
    int rdma_qp;
    int credits_qp; /* qp used for software flow control */
    bool cpc_explicitly_defined;
    /**< free list of frags only; used for pining user memory */
    ompi_free_list_t send_user_free;
    /**< free list of frags only; used for pining user memory */
    ompi_free_list_t recv_user_free;
    /**< frags for coalesced massages */
    ompi_free_list_t send_free_coalesced;
    /** Default receive queues */
    char* default_recv_qps;
    /** GID index to use */
    int gid_index;
    /** Whether we want a dynamically resizing srq, enabled by default */
    bool enable_srq_resize;
#if BTL_OPENIB_FAILOVER_ENABLED
    int verbose_failover;
#endif
}; typedef struct mca_btl_openib_component_t mca_btl_openib_component_t;

OMPI_MODULE_DECLSPEC extern mca_btl_openib_component_t mca_btl_openib_component;

typedef mca_btl_base_recv_reg_t mca_btl_openib_recv_reg_t;

/**
 * Common information for all ports that is sent in the modex message
 */
typedef struct mca_btl_openib_modex_message_t {
    /** The subnet ID of this port */
    uint64_t subnet_id;
    /** LID of this port */
    uint16_t lid;
    /** APM LID for this port */
    uint16_t apm_lid;
    /** The MTU used by this port */
    uint8_t mtu;
    /** vendor id define device type and tuning */
    uint32_t vendor_id;
    /** vendor part id define device type and tuning */
    uint32_t vendor_part_id;
    /** Transport type of remote port */
    uint8_t transport_type;
    /** Dummy field used to calculate the real length */
    uint8_t end;
} mca_btl_openib_modex_message_t;

#define MCA_BTL_OPENIB_MODEX_MSG_NTOH(hdr)     \
    do {                              \
        (hdr).subnet_id = ntoh64((hdr).subnet_id); \
        (hdr).lid = ntohs((hdr).lid); \
    } while (0)
#define MCA_BTL_OPENIB_MODEX_MSG_HTON(hdr)     \
    do {                              \
        (hdr).subnet_id = hton64((hdr).subnet_id); \
        (hdr).lid = htons((hdr).lid); \
    } while (0)

typedef struct mca_btl_openib_device_qp_t {
    ompi_free_list_t send_free;     /**< free lists of send buffer descriptors */
    ompi_free_list_t recv_free;     /**< free lists of receive buffer descriptors */
} mca_btl_openib_device_qp_t;

struct mca_btl_base_endpoint_t;

typedef struct mca_btl_openib_device_t {
    opal_object_t super;
    struct ibv_device *ib_dev;  /* the ib device */
#if OMPI_ENABLE_PROGRESS_THREADS == 1
    struct ibv_comp_channel *ib_channel; /* Channel event for the device */
    opal_thread_t thread;                /* Progress thread */
    volatile bool progress;              /* Progress status */
#endif
    opal_mutex_t device_lock;          /* device level lock */
    struct ibv_context *ib_dev_context;
    struct ibv_device_attr ib_dev_attr;
    struct ibv_pd *ib_pd;
    struct ibv_cq *ib_cq[2];
    uint32_t cq_size[2];
    mca_mpool_base_module_t *mpool;
    /* MTU for this device */
    uint32_t mtu;
    /* Whether this device supports eager RDMA */
    uint8_t use_eager_rdma;
    uint8_t btls;              /** < number of btls using this device */
    opal_pointer_array_t *endpoints;
    opal_pointer_array_t *device_btls;
    uint16_t hp_cq_polls;
    uint16_t eager_rdma_polls;
    bool pollme;
#if OPAL_HAVE_THREADS
    volatile bool got_fatal_event;
    volatile bool got_port_event;
#endif
#if HAVE_XRC
    struct ibv_xrc_domain *xrc_domain;
    int xrc_fd;
#endif
    int32_t non_eager_rdma_endpoints;
    int32_t eager_rdma_buffers_count;
    struct mca_btl_base_endpoint_t **eager_rdma_buffers;
    /**< frags for control massages */
    ompi_free_list_t send_free_control;
    /* QP types and attributes that will be used on this device */
    mca_btl_openib_device_qp_t *qps;
    /* Maximum value supported by this device for max_inline_data */
    uint32_t max_inline_data;
} mca_btl_openib_device_t;
OBJ_CLASS_DECLARATION(mca_btl_openib_device_t);

struct mca_btl_openib_module_pp_qp_t {
    int32_t dummy;
}; typedef struct mca_btl_openib_module_pp_qp_t mca_btl_openib_module_pp_qp_t;

struct mca_btl_openib_module_srq_qp_t {
    struct ibv_srq *srq;
    int32_t rd_posted;
    int32_t sd_credits;  /* the max number of outstanding sends on a QP when using SRQ */
                         /*  i.e. the number of frags that  can be outstanding (down counter) */
    opal_list_t pending_frags[2];    /**< list of high/low prio frags */
    /** The number of receive buffers that can be post in the current time.
        The value may be increased in the IBV_EVENT_SRQ_LIMIT_REACHED
        event handler. The value starts from (rd_num / 4) and increased up to rd_num */
    int32_t rd_curr_num;
    /** We post additional WQEs only if a number of WQEs (in specific SRQ) is less of this value.
         The value increased together with rd_curr_num. The value is unique for every SRQ. */
    int32_t rd_low_local;
    /** The flag points if we want to get the 
         IBV_EVENT_SRQ_LIMIT_REACHED events for dynamically resizing SRQ */
    bool srq_limit_event_flag;
    /**< In difference of the "--mca enable_srq_resize" parameter that says, if we want(or no)
         to start with small num of pre-posted receive buffers (rd_curr_num) and to increase this number by needs
         (the max of this value is rd_num <EFBFBD> the whole size of SRQ), the "srq_limit_event_flag" says if we want to get limit event
         from device if the defined srq limit was reached (signal to the main thread) and we put off this flag if the rd_curr_num
         was increased up to rd_num.
         In order to prevent lock/unlock operation in the critical path we prefer only put-on
         the srq_limit_event_flag in asynchronous thread, because in this way we post receive buffers
         in the main thread only and only after posting we set (if srq_limit_event_flag is true)
         the limit for IBV_EVENT_SRQ_LIMIT_REACHED event. */
}; typedef struct mca_btl_openib_module_srq_qp_t mca_btl_openib_module_srq_qp_t;

struct mca_btl_openib_module_qp_t {
    union {
        mca_btl_openib_module_pp_qp_t pp_qp;
        mca_btl_openib_module_srq_qp_t srq_qp;
    } u;
}; typedef struct mca_btl_openib_module_qp_t mca_btl_openib_module_qp_t;

/**
 * IB BTL Interface
 */
struct mca_btl_openib_module_t {
    /* Base BTL module */
    mca_btl_base_module_t  super;

    bool btl_inited;

    /** Common information about all ports */
    mca_btl_openib_modex_message_t port_info;

    /** Array of CPCs on this port */
    ompi_btl_openib_connect_base_module_t **cpcs;

    /** Number of elements in the cpcs array */
    uint8_t num_cpcs;

    mca_btl_openib_device_t *device;
    uint8_t port_num;                  /**< ID of the PORT */
    uint16_t pkey_index;
    struct ibv_port_attr ib_port_attr;
    uint16_t lid;                      /**< lid that is actually used (for LMC) */
    int apm_port;                      /**< Alternative port that may be used for APM */
    uint8_t src_path_bits;             /**< offset from base lid (for LMC) */

    int32_t num_peers;

    opal_mutex_t ib_lock;              /**< module level lock */

    size_t eager_rdma_frag_size;                /**< length of eager frag */
    volatile int32_t eager_rdma_channels;  /**< number of open RDMA channels */

    mca_btl_base_module_error_cb_fn_t error_cb; /**< error handler */

    mca_btl_openib_module_qp_t * qps;
};
typedef struct mca_btl_openib_module_t mca_btl_openib_module_t;

extern mca_btl_openib_module_t mca_btl_openib_module;

struct mca_btl_openib_reg_t {
    mca_mpool_base_registration_t base;
    struct ibv_mr *mr;
};
typedef struct mca_btl_openib_reg_t mca_btl_openib_reg_t;

#if OMPI_ENABLE_PROGRESS_THREADS == 1
extern void* mca_btl_openib_progress_thread(opal_object_t*);
#endif


/**
 * Register a callback function that is called on error..
 *
 * @param btl (IN)     BTL module
 * @return             Status indicating if cleanup was successful
 */

int mca_btl_openib_register_error_cb(
    struct mca_btl_base_module_t* btl,
    mca_btl_base_module_error_cb_fn_t cbfunc
);


/**
 * Cleanup any resources held by the BTL.
 *
 * @param btl  BTL instance.
 * @return     OMPI_SUCCESS or error status on failure.
 */

extern int mca_btl_openib_finalize(
    struct mca_btl_base_module_t* btl
);


/**
 * PML->BTL notification of change in the process list.
 *
 * @param btl (IN)            BTL module
 * @param nprocs (IN)         Number of processes
 * @param procs (IN)          Set of processes
 * @param peers (OUT)         Set of (optional) peer addressing info.
 * @param reachable (IN/OUT)  Set of processes that are reachable via this BTL.
 * @return     OMPI_SUCCESS or error status on failure.
 *
 */

extern int mca_btl_openib_add_procs(
    struct mca_btl_base_module_t* btl,
    size_t nprocs,
    struct ompi_proc_t **procs,
    struct mca_btl_base_endpoint_t** peers,
    opal_bitmap_t* reachable
);

/**
 * PML->BTL notification of change in the process list.
 *
 * @param btl (IN)     BTL instance
 * @param nproc (IN)   Number of processes.
 * @param procs (IN)   Set of processes.
 * @param peers (IN)   Set of peer data structures.
 * @return             Status indicating if cleanup was successful
 *
 */
extern int mca_btl_openib_del_procs(
    struct mca_btl_base_module_t* btl,
    size_t nprocs,
    struct ompi_proc_t **procs,
    struct mca_btl_base_endpoint_t** peers
);


/**
 * PML->BTL Initiate a send of the specified size.
 *
 * @param btl (IN)               BTL instance
 * @param btl_peer (IN)          BTL peer addressing
 * @param descriptor (IN)        Descriptor of data to be transmitted.
 * @param tag (IN)               Tag.
 */
extern int mca_btl_openib_send(
    struct mca_btl_base_module_t* btl,
    struct mca_btl_base_endpoint_t* btl_peer,
    struct mca_btl_base_descriptor_t* descriptor,
    mca_btl_base_tag_t tag
);

/**
 * PML->BTL Initiate a immediate send of the specified size.
 *
 * @param btl (IN)               BTL instance
 * @param ep (IN)                Endpoint
 * @param convertor (IN)         Datatypes converter
 * @param header (IN)            PML header
 * @param header_size (IN)       PML header size
 * @param payload_size (IN)      Payload size
 * @param order (IN)             Order
 * @param flags (IN)             Flags
 * @param tag (IN)               Tag
 * @param descriptor (OUT)       Messages descriptor
 */
extern int mca_btl_openib_sendi( struct mca_btl_base_module_t* btl,
    struct mca_btl_base_endpoint_t* ep,
    struct opal_convertor_t* convertor,
    void* header,
    size_t header_size,
    size_t payload_size,
    uint8_t order,
    uint32_t flags,
    mca_btl_base_tag_t tag,
    mca_btl_base_descriptor_t** descriptor
); 

/**
 * PML->BTL Initiate a put of the specified size.
 *
 * @param btl (IN)               BTL instance
 * @param btl_peer (IN)          BTL peer addressing
 * @param descriptor (IN)        Descriptor of data to be transmitted.
 */
extern int mca_btl_openib_put(
    struct mca_btl_base_module_t* btl,
    struct mca_btl_base_endpoint_t* btl_peer,
    struct mca_btl_base_descriptor_t* descriptor
    );

/**
 * PML->BTL Initiate a get of the specified size.
 *
 * @param btl (IN)               BTL instance
 * @param btl_base_peer (IN)     BTL peer addressing
 * @param descriptor (IN)        Descriptor of data to be transmitted.
 */
extern int mca_btl_openib_get(
    struct mca_btl_base_module_t* btl,
    struct mca_btl_base_endpoint_t* btl_peer,
    struct mca_btl_base_descriptor_t* descriptor
    );


/**
 * Allocate a descriptor.
 *
 * @param btl (IN)      BTL module
 * @param size (IN)     Requested descriptor size.
 */
extern mca_btl_base_descriptor_t* mca_btl_openib_alloc(
        struct mca_btl_base_module_t* btl,
        struct mca_btl_base_endpoint_t* endpoint,
        uint8_t order,
        size_t size,
        uint32_t flags);


/**
 * Return a segment allocated by this BTL.
 *
 * @param btl (IN)         BTL module
 * @param descriptor (IN)  Allocated descriptor.
 */
extern int mca_btl_openib_free(
                               struct mca_btl_base_module_t* btl,
                               mca_btl_base_descriptor_t* des);


/**
 * Pack data and return a descriptor that can be
 * used for send/put.
 *
 * @param btl (IN)      BTL module
 * @param peer (IN)     BTL peer addressing
 */
mca_btl_base_descriptor_t* mca_btl_openib_prepare_src(
                                                      struct mca_btl_base_module_t* btl,
                                                      struct mca_btl_base_endpoint_t* peer,
                                                      mca_mpool_base_registration_t* registration,
                                                      struct opal_convertor_t* convertor,
                                                      uint8_t order,
                                                      size_t reserve,
                                                      size_t* size,
                                                      uint32_t flags
                                                      );

/**
 * Allocate a descriptor initialized for RDMA write.
 *
 * @param btl (IN)      BTL module
 * @param peer (IN)     BTL peer addressing
 */
extern mca_btl_base_descriptor_t* mca_btl_openib_prepare_dst(
                                                             struct mca_btl_base_module_t* btl,
                                                             struct mca_btl_base_endpoint_t* peer,
                                                             mca_mpool_base_registration_t* registration,
                                                             struct opal_convertor_t* convertor,
                                                             uint8_t order,
                                                             size_t reserve,
                                                             size_t* size,
                                                             uint32_t flags);

extern void mca_btl_openib_frag_progress_pending_put_get(
        struct mca_btl_base_endpoint_t*, const int);

/**
 * Fault Tolerance Event Notification Function
 *
 * @param state (IN)  Checkpoint State
 * @return OMPI_SUCCESS or failure status
 */
extern int mca_btl_openib_ft_event(int state);


/**
 * Show an error during init, particularly when running out of
 * registered memory.
 */
void mca_btl_openib_show_init_error(const char *file, int line,
                                    const char *func, const char *dev);

#define BTL_OPENIB_HP_CQ 0
#define BTL_OPENIB_LP_CQ 1


/**
 * Post to Shared Receive Queue with certain priority
 *
 * @param openib_btl (IN) BTL module
 * @param additional (IN) Additional Bytes to reserve
 * @param prio (IN)       Priority (either BTL_OPENIB_HP_QP or BTL_OPENIB_LP_QP)
 * @return OMPI_SUCCESS or failure status
 */

int mca_btl_openib_post_srr(mca_btl_openib_module_t* openib_btl, const int qp);

/**
 * Get a transport name of btl by its transport type.
 */

const char* btl_openib_get_transport_name(mca_btl_openib_transport_type_t transport_type);

/**
 * Get a transport type of btl.
 */

mca_btl_openib_transport_type_t mca_btl_openib_get_transport_type(mca_btl_openib_module_t* openib_btl);

static inline int qp_cq_prio(const int qp)
{
    if(0 == qp)
        return BTL_OPENIB_HP_CQ; /* smallest qp is always HP */

    /* If the size for this qp is <= the eager limit, make it a
       high priority QP.  Otherwise, make it a low priority QP. */
    return (mca_btl_openib_component.qp_infos[qp].size <=
            mca_btl_openib_component.eager_limit) ?
        BTL_OPENIB_HP_CQ : BTL_OPENIB_LP_CQ;
}

#define BTL_OPENIB_RDMA_QP(QP) \
    ((QP) == mca_btl_openib_component.rdma_qp)

END_C_DECLS

#endif /* MCA_BTL_IB_H */
-												Replace the ompi_pointer_array with opal_pointer_array. The next step
(sometimes after the merge with the ORTE branch), the opal_pointer_array
will became the only pointer_array implementation (the orte_pointer_array
will be removed).

This commit was SVN r17007.

											
										
										
											2007-12-21 09:02:00 +03:00
+								/* -*- Mode: C; c-basic-offset:4 ; -*- */
-												removed the ib ptl and fixed a few conflicts in my previous commit

This commit was SVN r6244.

											
										
										
											2005-07-01 01:28:35 +04:00
+								/*
-												Merging in the jjhursey-ft-cr-stable branch (r13912 : HEAD).

This merge adds Checkpoint/Restart support to Open MPI. The initial
frameworks and components support a LAM/MPI-like implementation.

This commit follows the risk assessment presented to the Open MPI core
development group on Feb. 22, 2007.

This commit closes trac:158

More details to follow.

This commit was SVN r14051.

The following SVN revisions from the original message are invalid or
inconsistent and therefore were not cross-referenced:
  r13912

The following Trac tickets were found above:
  Ticket 158 --> https://svn.open-mpi.org/trac/ompi/ticket/158

											
										
										
											2007-03-17 02:11:45 +03:00
+								 * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
-												Update the copyright notices for IU and UTK.

This commit was SVN r7999.

											
										
										
											2005-11-05 22:57:48 +03:00
+								 *                         University Research and Technology
 								 *                         Corporation.  All rights reserved.
-												Get rid of the bitmap header file.

This commit was SVN r20972.

											
										
										
											2009-04-10 20:44:37 +04:00
+								 * Copyright (c) 2004-2009 The University of Tennessee and The University
-												Update the copyright notices for IU and UTK.

This commit was SVN r7999.

											
										
										
											2005-11-05 22:57:48 +03:00
+								 *                         of Tennessee Research Foundation.  All rights
 								 *                         reserved.
-												Remove trailing whitespaces. No code changes in this commit.

This commit was SVN r17167.

											
										
										
											2008-01-21 15:11:18 +03:00
+								 * Copyright (c) 2004-2007 High Performance Computing Center Stuttgart,
-												removed the ib ptl and fixed a few conflicts in my previous commit

This commit was SVN r6244.

											
										
										
											2005-07-01 01:28:35 +04:00
+								 *                         University of Stuttgart.  All rights reserved.
 								 * Copyright (c) 2004-2005 The Regents of the University of California.
 								 *                         All rights reserved.
-												Add btl_openib_gid_index MCA param to allow selecting which GID to use
from an openfabrics port's GID table.

This commit was SVN r24456.

											
										
										
											2011-02-24 17:09:22 +03:00
+								 * Copyright (c) 2006-2011 Cisco Systems, Inc.  All rights reserved.
-												Adding send_immediate (sendi) implementation to openib btl.

This commit was SVN r20881.

											
										
										
											2009-03-25 19:53:26 +03:00
+								 * Copyright (c) 2006-2009 Mellanox Technologies. All rights reserved.
-												update copyrights for ib_multifrag commit 

This commit was SVN r15612.

											
										
										
											2007-07-25 19:03:34 +04:00
+								 * Copyright (c) 2006-2007 Los Alamos National Security, LLC.  All rights
-												Remove trailing whitespaces. No code changes in this commit.

This commit was SVN r17167.

											
										
										
											2008-01-21 15:11:18 +03:00
+								 *                         reserved.
-												Update Voltaire copyright.

This commit was SVN r16189.

											
										
										
											2007-09-24 14:11:52 +04:00
+								 * Copyright (c) 2006-2007 Voltaire All rights reserved.
-												Start setting a flag when a port error is detected on the openib BTL.
At this point, it is just cleared (and ignored) so default behavior has not changed.
However, future failover support can take advantage of this flag.
Reviewed by Pasha Shamis.

This commit was SVN r23204.

											
										
										
											2010-05-24 22:57:55 +04:00
+								 * Copyright (c) 2009-2010 Oracle and/or its affiliates.  All rights reserved.
-												removed the ib ptl and fixed a few conflicts in my previous commit

This commit was SVN r6244.

											
										
										
											2005-07-01 01:28:35 +04:00
+								 * $COPYRIGHT$
-												Remove trailing whitespaces. No code changes in this commit.

This commit was SVN r17167.

											
										
										
											2008-01-21 15:11:18 +03:00
+								 *
-												removed the ib ptl and fixed a few conflicts in my previous commit

This commit was SVN r6244.

											
										
										
											2005-07-01 01:28:35 +04:00
+								 * Additional copyrights may follow
-												Remove trailing whitespaces. No code changes in this commit.

This commit was SVN r17167.

											
										
										
											2008-01-21 15:11:18 +03:00
+								 *
-												removed the ib ptl and fixed a few conflicts in my previous commit

This commit was SVN r6244.

											
										
										
											2005-07-01 01:28:35 +04:00
+								 * $HEADER$
-												Bring over all the work from the /tmp/ib-hw-detect branch.  In
addition to my design and testing, it was conceptually approved by
Gil, Gleb, Pasha, Brad, and Galen.  Functionally [probably somewhat
lightly] tested by Galen.  We may still have to shake out some bugs
during the next few months, but it seems to be working for all the
cases that I can throw at it.

Here's a summary of the changes from that branch: 

* Move MCA parameter registration to a new file (btl_openib_mca.c):
   * Properly check the retun status of registering MCA params
   * Check for valid values of MCA parameters
   * Make help strings better
   * Otherwise, the only default value of an MCA param that was
     changed was max_btls; it went from 4 to -1 (meaning: use all
     available)
 * Properly prototyped internal functions in _component.c
   * Made a bunch of functions static that didn't need to be public
   * Renamed to remove "mca_" prefix from static functions
   * Call new MCA param registration function
   * Call new INI file read/lookup/finalize functions
   * Updated a bunch of macros to be "BTL_" instead of "ORTE_"
   * Be a little more consistent with return values
   * Handle -1 for the max_btls MCA param
   * Fixed a free() that should have been an OBJ_RELEASE()
   * Some re-indenting
 * Added INI-file parsing
   * New flex file: btl_openib_ini.l
   * New default HCA params .ini file (probably to be expanded over
     time by other HCA vendors)
   * Added more show_help messages for parsing problems
   * Read in INI files and cache the values for later lookup
   * When component opens an HCA, lookup to see if any corresponding
     values were found in the INI files (ID'ed by the HCA vendor_id
     and vendor_part_id)
   * Added btl_openib_verbose MCA param that shows what the INI-file
     stuff does (e.g., shows which MTU your HCA ends up using)
   * Added btl_openib_hca_param_files as a colon-delimited list of INI
     files to check for values during startup (in order,
     left-to-right, just like the MCA base directory param).
   * MTU is currently the only value supported in this framework.
   * It is not a fatal error if we don't find params for the HCA in
     the INI file(s).  Instead, just print a warning.  New MCA param
     btl_openib_warn_no_hca_params_found can be used to disable
     printing the warning.
 * Add MTU to peer negotiation when making a connection
   * Exchange maximum MTU; select the lesser of the two

This commit was SVN r11182.

											
										
										
											2006-08-14 23:30:37 +04:00
+								 *
-												removed the ib ptl and fixed a few conflicts in my previous commit

This commit was SVN r6244.

											
										
										
											2005-07-01 01:28:35 +04:00
+								 * @file
 								 */
-												Bring over all the work from the /tmp/ib-hw-detect branch.  In
addition to my design and testing, it was conceptually approved by
Gil, Gleb, Pasha, Brad, and Galen.  Functionally [probably somewhat
lightly] tested by Galen.  We may still have to shake out some bugs
during the next few months, but it seems to be working for all the
cases that I can throw at it.

Here's a summary of the changes from that branch: 

* Move MCA parameter registration to a new file (btl_openib_mca.c):
   * Properly check the retun status of registering MCA params
   * Check for valid values of MCA parameters
   * Make help strings better
   * Otherwise, the only default value of an MCA param that was
     changed was max_btls; it went from 4 to -1 (meaning: use all
     available)
 * Properly prototyped internal functions in _component.c
   * Made a bunch of functions static that didn't need to be public
   * Renamed to remove "mca_" prefix from static functions
   * Call new MCA param registration function
   * Call new INI file read/lookup/finalize functions
   * Updated a bunch of macros to be "BTL_" instead of "ORTE_"
   * Be a little more consistent with return values
   * Handle -1 for the max_btls MCA param
   * Fixed a free() that should have been an OBJ_RELEASE()
   * Some re-indenting
 * Added INI-file parsing
   * New flex file: btl_openib_ini.l
   * New default HCA params .ini file (probably to be expanded over
     time by other HCA vendors)
   * Added more show_help messages for parsing problems
   * Read in INI files and cache the values for later lookup
   * When component opens an HCA, lookup to see if any corresponding
     values were found in the INI files (ID'ed by the HCA vendor_id
     and vendor_part_id)
   * Added btl_openib_verbose MCA param that shows what the INI-file
     stuff does (e.g., shows which MTU your HCA ends up using)
   * Added btl_openib_hca_param_files as a colon-delimited list of INI
     files to check for values during startup (in order,
     left-to-right, just like the MCA base directory param).
   * MTU is currently the only value supported in this framework.
   * It is not a fatal error if we don't find params for the HCA in
     the INI file(s).  Instead, just print a warning.  New MCA param
     btl_openib_warn_no_hca_params_found can be used to disable
     printing the warning.
 * Add MTU to peer negotiation when making a connection
   * Exchange maximum MTU; select the lesser of the two

This commit was SVN r11182.

											
										
										
											2006-08-14 23:30:37 +04:00
-												 - Add a few comments for elements for structs, a few spelling fixes.
   No functional change.

This commit was SVN r14534.

											
										
										
											2007-04-27 01:03:38 +04:00
+								#ifndef MCA_BTL_IB_H
 								#define MCA_BTL_IB_H
-												removed the ib ptl and fixed a few conflicts in my previous commit

This commit was SVN r6244.

											
										
										
											2005-07-01 01:28:35 +04:00
-												 - An intrusive commit yet again (sorry): with the separation we
   get bitten by header depending on having already included
   the corresponding [opal|orte|ompi]_config.h header.
   When separating, things like [OPAL|ORTE|OMPI]_DECLSPEC
   are missed.

   Script to add the corresponding header in front of all following
   (taking care of possible #ifdef HAVE_...)

 - Including some minor cleanups to
   - ompi/group/group.h -- include _after_ #ifndef OMPI_GROUP_H
   - ompi/mca/btl/btl.h -- nclude _after_ #ifndef MCA_BTL_H
   - ompi/mca/crcp/bkmrk/crcp_bkmrk_btl.c -- still no need for
     orte/util/output.h
   - ompi/mca/pml/dr/pml_dr_recvreq.c -- no need for mpool.h
   - ompi/mca/btl/btl.h -- reorder to fit
   - ompi/mca/bml/bml.h -- reorder to fit
   - ompi/runtime/ompi_mpi_finalize.c -- reorder to fit
   - ompi/request/request.h -- additionally need ompi/constants.h

 - Tested on linux/x86-64

This commit was SVN r20720.

											
										
										
											2009-03-04 18:35:54 +03:00
+								#include "ompi_config.h"
-												removed the ib ptl and fixed a few conflicts in my previous commit

This commit was SVN r6244.

											
										
										
											2005-07-01 01:28:35 +04:00
+								#include <sys/types.h>
 								#include <string.h>
-												Support for LMC (lid mask count) and multiple QPs per port.

This commit was SVN r10536.

											
										
										
											2006-06-28 11:23:08 +04:00
+								#include <infiniband/verbs.h>
-												removed the ib ptl and fixed a few conflicts in my previous commit

This commit was SVN r6244.

											
										
										
											2005-07-01 01:28:35 +04:00
 								/* Open MPI includes */
-												Next step in the project split, mainly source code re-arranging

  - move files out of toplevel include/ and etc/, moving it into the
    sub-projects
  - rather than including config headers with <project>/include, 
    have them as <project>
  - require all headers to be included with a project prefix, with
    the exception of the config headers ({opal,orte,ompi}_config.h
    mpi.h, and mpif.h)

This commit was SVN r8985.

											
										
										
											2006-02-12 04:33:29 +03:00
+								#include "ompi/class/ompi_free_list.h"
-												Replace the ompi_pointer_array with opal_pointer_array. The next step
(sometimes after the merge with the ORTE branch), the opal_pointer_array
will became the only pointer_array implementation (the orte_pointer_array
will be removed).

This commit was SVN r17007.

											
										
										
											2007-12-21 09:02:00 +03:00
+								#include "opal/class/opal_pointer_array.h"
-												 - For the upcoming header cleanup commit,
   several header files (previously included by header-files)
   now have to be moved "upward".
   This is mainly system headers such as string.h, stdio.h and for
   networking, but also some orte headers.

This commit was SVN r21095.

											
										
										
											2009-04-29 04:49:23 +04:00
+								#include "opal/class/opal_hash_table.h"
-												 - On the way to get the BTLs split out and lessen dependency on orte:
   Often, orte/util/show_help.h is included, although no functionality
   is required -- instead, most often opal_output.h, or               
   orte/mca/rml/rml_types.h                                           
   Please see orte_show_help_replacement.sh commited next.            

 - Local compilation (Linux/x86_64) w/ -Wimplicit-function-declaration
   actually showed two *missing* #include "orte/util/show_help.h"     
   in orte/mca/odls/base/odls_base_default_fns.c and                  
   in orte/tools/orte-top/orte-top.c                                  
   Manually added these.                                              

   Let's have MTT the last word.

This commit was SVN r20557.

											
										
										
											2009-02-14 05:26:12 +03:00
+								#include "opal/util/output.h"
-Update libevent to the 2.0 series, currently at 2.0.7rc. We will update to their final release when it becomes available. Currently known errors exist in unused portions of the libevent code. This revision passes the IBM test suite on a Linux machine and on a standalone Mac.

This is a fairly intrusive change, but outside of the moving of opal/event to opal/mca/event, the only changes involved (a) changing all calls to opal_event functions to reflect the new framework instead, and (b) ensuring that all opal_event_t objects are properly constructed since they are now true opal_objects.

Note: Shiqing has just returned from vacation and has not yet had a chance to complete the Windows integration. Thus, this commit almost certainly breaks Windows support on the trunk. However, I want this to have a chance to soak for as long as possible before I become less available a week from today (going to be at a class for 5 days, and thus will only be sparingly available) so we can find and fix any problems.

Biggest change is moving the libevent code from opal/event to a new opal/mca/event framework. This was done to make it much easier to update libevent in the future. New versions can be inserted as a new component and tested in parallel with the current version until validated, then we can remove the earlier version if we so choose. This is a statically built framework ala installdirs, so only one component will build at a time. There is no selection logic - the sole compiled component simply loads its function pointers into the opal_event struct.

I have gone thru the code base and converted all the libevent calls I could find. However, I cannot compile nor test every environment. It is therefore quite likely that errors remain in the system. Please keep an eye open for two things:

1. compile-time errors: these will be obvious as calls to the old functions (e.g., opal_evtimer_new) must be replaced by the new framework APIs (e.g., opal_event.evtimer_new)

2. run-time errors: these will likely show up as segfaults due to missing constructors on opal_event_t objects. It appears that it became a typical practice for people to "init" an opal_event_t by simply using memset to zero it out. This will no longer work - you must either OBJ_NEW or OBJ_CONSTRUCT an opal_event_t. I tried to catch these cases, but may have missed some. Believe me, you'll know when you hit it.

There is also the issue of the new libevent "no recursion" behavior. As I described on a recent email, we will have to discuss this and figure out what, if anything, we need to do.

This commit was SVN r23925.

											
										
										
											2010-10-24 22:35:54 +04:00
+								#include "opal/mca/event/event.h"
-												Include the missing thread header, which is needed when build with --enable-progress-thread.

This commit was SVN r22239.

											
										
										
											2009-11-27 17:49:24 +03:00
+								#include "opal/threads/threads.h"
-												Next step in the project split, mainly source code re-arranging

  - move files out of toplevel include/ and etc/, moving it into the
    sub-projects
  - rather than including config headers with <project>/include, 
    have them as <project>
  - require all headers to be included with a project prefix, with
    the exception of the config headers ({opal,orte,ompi}_config.h
    mpi.h, and mpif.h)

This commit was SVN r8985.

											
										
										
											2006-02-12 04:33:29 +03:00
+								#include "ompi/mca/btl/btl.h"
-												Remove trailing whitespaces. No code changes in this commit.

This commit was SVN r17167.

											
										
										
											2008-01-21 15:11:18 +03:00
+								#include "ompi/mca/mpool/mpool.h"
 								#include "ompi/mca/btl/base/btl_base_error.h"
-												removed the ib ptl and fixed a few conflicts in my previous commit

This commit was SVN r6244.

											
										
										
											2005-07-01 01:28:35 +04:00
-												Next step in the project split, mainly source code re-arranging

  - move files out of toplevel include/ and etc/, moving it into the
    sub-projects
  - rather than including config headers with <project>/include, 
    have them as <project>
  - require all headers to be included with a project prefix, with
    the exception of the config headers ({opal,orte,ompi}_config.h
    mpi.h, and mpif.h)

This commit was SVN r8985.

											
										
										
											2006-02-12 04:33:29 +03:00
+								#include "ompi/mca/btl/btl.h"
-												Remove trailing whitespaces. No code changes in this commit.

This commit was SVN r17167.

											
										
										
											2008-01-21 15:11:18 +03:00
+								#include "ompi/mca/btl/base/base.h"
-												Consolidate receive buffers prepost code for HP/LP QPs.

This commit was SVN r11552.

											
										
										
											2006-09-07 17:05:41 +04:00
-												The new cpc selection framework is now in place.  The patch below allows
for dynamic selection of cpc methods based on what is available.  It
also allows for inclusion/exclusions of methods.  It even futher allows
for modifying the priorities of certain cpc methods to better determine
the optimal cpc method.

This patch also contains XRC compile time disablement (per Jeff's
patch).

At a high level, the cpc selections works by walking through each cpc
and allowing it to test to see if it is permissable to run on this
mpirun.  It returns a priority if it is permissable or a -1 if not.  All
of the cpc names and priorities are rolled into a string.  This string
is then encapsulated in a message and passed around all the ompi
processes.  Once received and unpacked, the list received is compared
to a local copy of the list.  The connection method is chosen by
comparing the lists passed around to all nodes via modex with the list
generated locally.  Any non-negative number is a potentially valid
connection method.  The method below of determining the optimal
connection method is to take the cross-section of the two lists.  The
highest single value (and the other side being non-negative) is selected
as the cpc method.

svn merge -r 16948:17128 https://svn.open-mpi.org/svn/ompi/tmp-public/openib-cpc/ .

This commit was SVN r17138.

											
										
										
											2008-01-15 02:22:03 +03:00
+								#include "connect/connect.h"
-												Bring over the functionality from the /tmp/jnysal-openib-wireup
branch:

 * Support btl_openib_if_include and btl_openib_if_exclude MCA
   parameters, similar to those supported by other BTLs.  Each take a
   comma-delimited lists of identifiers.  Identifiers can be HCA
   interface names (e.g., ipath0, mthca1, etc.)  or an HCA interface
   name and port numbers (e.g., ipath0:1, mthca1:2, etc.).  It is an
   error to specify both _include and _exclude.  If you specify a
   non-existant (or non-ACTIVE) HCA and/or port, you'll get a warning
   unless you disable the warning by setting the MCA parameter
   btl_openib_warn_nonexistent_if to 0.
 * Start updating to use BEGIN_C_DECLS and END_C_DECLS
 * A few other minor fixes that were picked up along the way.

This commit was SVN r15063.

											
										
										
											2007-06-14 05:59:25 +04:00
+								BEGIN_C_DECLS
-												removed the ib ptl and fixed a few conflicts in my previous commit

This commit was SVN r6244.

											
										
										
											2005-07-01 01:28:35 +04:00
-												Bugfix for #1375.
- Adding configure options that allow to disable IB/RDMA-CM support.
- Code cleanup in openib section of configure

This commit was SVN r18830.

											
										
										
											2008-07-08 10:32:54 +04:00
+								#define HAVE_XRC (1 == OMPI_HAVE_CONNECTX_XRC)
-												Initial XRC support by Mellanox.

This commit was SVN r16787.

											
										
										
											2007-11-28 10:18:59 +03:00
-												removed the ib ptl and fixed a few conflicts in my previous commit

This commit was SVN r6244.

											
										
										
											2005-07-01 01:28:35 +04:00
+								#define MCA_BTL_IB_LEAVE_PINNED 1
-												Local GID table contains not what I thought it contains. It contains local HCA
GIDs (there can be more than one) and not GIDs of the HCA on the network. Entry
zero always have to be initialized so we use it, and warn user if there is more
then one port active and default subnet is configured on at least one of them.

This commit was SVN r11815.

											
										
										
											2006-09-26 16:12:33 +04:00
+								#define IB_DEFAULT_GID_PREFIX 0xfe80000000000000ll
-												Fixing openib partition support.

This commit was SVN r19705.

											
										
										
											2008-10-08 13:56:43 +04:00
+								#define MCA_BTL_IB_PKEY_MASK 0x7fff
-												removed the ib ptl and fixed a few conflicts in my previous commit

This commit was SVN r6244.

											
										
										
											2005-07-01 01:28:35 +04:00
-												This commit brings in two major things:

1. Galen's fine-grain control of queue pair resources in the openib
   BTL.
1. Pasha's new implementation of asychronous HCA event handling.

Pasha's new implementation doesn't take much explanation, but the new
"multifrag" stuff does.  

Note that "svn merge" was not used to bring this new code from the
/tmp/ib_multifrag branch -- something Bad happened in the periodic
trunk pulls on that branch making an actual merge back to the trunk
effectively impossible (i.e., lots and lots of arbitrary conflicts and
artifical changes).  :-(

== Fine-grain control of queue pair resources ==

Galen's fine-grain control of queue pair resources to the OpenIB BTL
(thanks to Gleb for fixing broken code and providing additional
functionality, Pasha for finding broken code, and Jeff for doing all
the svn work and regression testing).

Prior to this commit, the OpenIB BTL created two queue pairs: one for
eager size fragments and one for max send size fragments.  When the
use of the shared receive queue (SRQ) was specified (via "-mca
btl_openib_use_srq 1"), these QPs would use a shared receive queue for
receive buffers instead of the default per-peer (PP) receive queues
and buffers.  One consequence of this design is that receive buffer
utilization (the size of the data received as a percentage of the
receive buffer used for the data) was quite poor for a number of
applications.

The new design allows multiple QPs to be specified at runtime.  Each
QP can be setup to use PP or SRQ receive buffers as well as giving
fine-grained control over receive buffer size, number of receive
buffers to post, when to replenish the receive queue (low water mark)
and for SRQ QPs, the number of outstanding sends can also be
specified.  The following is an example of the syntax to describe QPs
to the OpenIB BTL using the new MCA parameter btl_openib_receive_queues:

{{{
-mca btl_openib_receive_queues \
     "P,128,16,4;S,1024,256,128,32;S,4096,256,128,32;S,65536,256,128,32"
}}}

Each QP description is delimited by ";" (semicolon) with individual
fields of the QP description delimited by "," (comma).  The above
example therefore describes 4 QPs.

The first QP is:

    P,128,16,4

Meaning: per-peer receive buffer QPs are indicated by a starting field
of "P"; the first QP (shown above) is therefore a per-peer based QP.
The second field indicates the size of the receive buffer in bytes
(128 bytes).  The third field indicates the number of receive buffers
to allocate to the QP (16).  The fourth field indicates the low
watermark for receive buffers at which time the BTL will repost
receive buffers to the QP (4).

The second QP is:

    S,1024,256,128,32

Shared receive queue based QPs are indicated by a starting field of
"S"; the second QP (shown above) is therefore a shared receive queue
based QP.  The second, third and fourth fields are the same as in the
per-peer based QP.  The fifth field is the number of outstanding sends
that are allowed at a given time on the QP (32).  This provides a
"good enough" mechanism of flow control for some regular communication
patterns.

QPs MUST be specified in ascending receive buffer size order.  This
requirement may be removed prior to 1.3 release.

This commit was SVN r15474.

											
										
										
											2007-07-18 05:15:59 +04:00
-												Merge in /tmp-public/cpc3 branch to trunk.  oob/xoob still remains the
default CPC.

This commit was SVN r18356.

											
										
										
											2008-05-02 15:52:33 +04:00
+								/*--------------------------------------------------------------------*/
-												This is a very large change to rename several #define values from
OMPI_* to OPAL_*.  This allows opal layer to be used more independent
from the whole of ompi.

NOTE: 9 "svn mv" operations immediately follow this commit.

This commit was SVN r21180.

											
										
										
											2009-05-07 00:11:28 +04:00
+								#if OPAL_ENABLE_DEBUG
-												Merge in /tmp-public/cpc3 branch to trunk.  oob/xoob still remains the
default CPC.

This commit was SVN r18356.

											
										
										
											2008-05-02 15:52:33 +04:00
+								#define ATTACH() do { \
 								  int i = 0; \
-												Effectively revert the orte_output system and return to direct use of opal_output at all levels. Retain the orte_show_help subsystem to allow aggregation of show_help messages at the HNP.

After much work by Jeff and myself, and quite a lot of discussion, it has become clear that we simply cannot resolve the infinite loops caused by RML-involved subsystems calling orte_output. The original rationale for the change to orte_output has also been reduced by shifting the output of XML-formatted vs human readable messages to an alternative approach.

I have globally replaced the orte_output/ORTE_OUTPUT calls in the code base, as well as the corresponding .h file name. I have test compiled and run this on the various environments within my reach, so hopefully this will prove minimally disruptive.

This commit was SVN r18619.

											
										
										
											2008-06-09 18:53:58 +04:00
+								  opal_output(0, "WAITING TO DEBUG ATTACH"); \
-												Merge in /tmp-public/cpc3 branch to trunk.  oob/xoob still remains the
default CPC.

This commit was SVN r18356.

											
										
										
											2008-05-02 15:52:33 +04:00
+								  while (i == 0) sleep(5); \
 								  } while(0);
 								#else
 								#define ATTACH()
 								#endif
 								/*--------------------------------------------------------------------*/
-												removed the ib ptl and fixed a few conflicts in my previous commit

This commit was SVN r6244.

											
										
										
											2005-07-01 01:28:35 +04:00
+								/**
 								 * Infiniband (IB) BTL component.
 								 */
-												Improving support for non homogeneous OpenFabrics network configurations

This commit was SVN r22312.

											
										
										
											2009-12-15 17:25:07 +03:00
+								typedef enum {
 								    MCA_BTL_OPENIB_TRANSPORT_IB,
 								    MCA_BTL_OPENIB_TRANSPORT_IWARP,
 								    MCA_BTL_OPENIB_TRANSPORT_RDMAOE,
 								    MCA_BTL_OPENIB_TRANSPORT_UNKNOWN,
 								    MCA_BTL_OPENIB_TRANSPORT_SIZE
 								} mca_btl_openib_transport_type_t;
-												Remove trailing whitespaces. No code changes in this commit.

This commit was SVN r17167.

											
										
										
											2008-01-21 15:11:18 +03:00
+								typedef enum {
-												This commit brings in two major things:

1. Galen's fine-grain control of queue pair resources in the openib
   BTL.
1. Pasha's new implementation of asychronous HCA event handling.

Pasha's new implementation doesn't take much explanation, but the new
"multifrag" stuff does.  

Note that "svn merge" was not used to bring this new code from the
/tmp/ib_multifrag branch -- something Bad happened in the periodic
trunk pulls on that branch making an actual merge back to the trunk
effectively impossible (i.e., lots and lots of arbitrary conflicts and
artifical changes).  :-(

== Fine-grain control of queue pair resources ==

Galen's fine-grain control of queue pair resources to the OpenIB BTL
(thanks to Gleb for fixing broken code and providing additional
functionality, Pasha for finding broken code, and Jeff for doing all
the svn work and regression testing).

Prior to this commit, the OpenIB BTL created two queue pairs: one for
eager size fragments and one for max send size fragments.  When the
use of the shared receive queue (SRQ) was specified (via "-mca
btl_openib_use_srq 1"), these QPs would use a shared receive queue for
receive buffers instead of the default per-peer (PP) receive queues
and buffers.  One consequence of this design is that receive buffer
utilization (the size of the data received as a percentage of the
receive buffer used for the data) was quite poor for a number of
applications.

The new design allows multiple QPs to be specified at runtime.  Each
QP can be setup to use PP or SRQ receive buffers as well as giving
fine-grained control over receive buffer size, number of receive
buffers to post, when to replenish the receive queue (low water mark)
and for SRQ QPs, the number of outstanding sends can also be
specified.  The following is an example of the syntax to describe QPs
to the OpenIB BTL using the new MCA parameter btl_openib_receive_queues:

{{{
-mca btl_openib_receive_queues \
     "P,128,16,4;S,1024,256,128,32;S,4096,256,128,32;S,65536,256,128,32"
}}}

Each QP description is delimited by ";" (semicolon) with individual
fields of the QP description delimited by "," (comma).  The above
example therefore describes 4 QPs.

The first QP is:

    P,128,16,4

Meaning: per-peer receive buffer QPs are indicated by a starting field
of "P"; the first QP (shown above) is therefore a per-peer based QP.
The second field indicates the size of the receive buffer in bytes
(128 bytes).  The third field indicates the number of receive buffers
to allocate to the QP (16).  The fourth field indicates the low
watermark for receive buffers at which time the BTL will repost
receive buffers to the QP (4).

The second QP is:

    S,1024,256,128,32

Shared receive queue based QPs are indicated by a starting field of
"S"; the second QP (shown above) is therefore a shared receive queue
based QP.  The second, third and fourth fields are the same as in the
per-peer based QP.  The fifth field is the number of outstanding sends
that are allowed at a given time on the QP (32).  This provides a
"good enough" mechanism of flow control for some regular communication
patterns.

QPs MUST be specified in ascending receive buffer size order.  This
requirement may be removed prior to 1.3 release.

This commit was SVN r15474.

											
										
										
											2007-07-18 05:15:59 +04:00
+								    MCA_BTL_OPENIB_PP_QP,
-												Initial XRC support by Mellanox.

This commit was SVN r16787.

											
										
										
											2007-11-28 10:18:59 +03:00
+								    MCA_BTL_OPENIB_SRQ_QP,
 								    MCA_BTL_OPENIB_XRC_QP
-												This commit brings in two major things:

1. Galen's fine-grain control of queue pair resources in the openib
   BTL.
1. Pasha's new implementation of asychronous HCA event handling.

Pasha's new implementation doesn't take much explanation, but the new
"multifrag" stuff does.  

Note that "svn merge" was not used to bring this new code from the
/tmp/ib_multifrag branch -- something Bad happened in the periodic
trunk pulls on that branch making an actual merge back to the trunk
effectively impossible (i.e., lots and lots of arbitrary conflicts and
artifical changes).  :-(

== Fine-grain control of queue pair resources ==

Galen's fine-grain control of queue pair resources to the OpenIB BTL
(thanks to Gleb for fixing broken code and providing additional
functionality, Pasha for finding broken code, and Jeff for doing all
the svn work and regression testing).

Prior to this commit, the OpenIB BTL created two queue pairs: one for
eager size fragments and one for max send size fragments.  When the
use of the shared receive queue (SRQ) was specified (via "-mca
btl_openib_use_srq 1"), these QPs would use a shared receive queue for
receive buffers instead of the default per-peer (PP) receive queues
and buffers.  One consequence of this design is that receive buffer
utilization (the size of the data received as a percentage of the
receive buffer used for the data) was quite poor for a number of
applications.

The new design allows multiple QPs to be specified at runtime.  Each
QP can be setup to use PP or SRQ receive buffers as well as giving
fine-grained control over receive buffer size, number of receive
buffers to post, when to replenish the receive queue (low water mark)
and for SRQ QPs, the number of outstanding sends can also be
specified.  The following is an example of the syntax to describe QPs
to the OpenIB BTL using the new MCA parameter btl_openib_receive_queues:

{{{
-mca btl_openib_receive_queues \
     "P,128,16,4;S,1024,256,128,32;S,4096,256,128,32;S,65536,256,128,32"
}}}

Each QP description is delimited by ";" (semicolon) with individual
fields of the QP description delimited by "," (comma).  The above
example therefore describes 4 QPs.

The first QP is:

    P,128,16,4

Meaning: per-peer receive buffer QPs are indicated by a starting field
of "P"; the first QP (shown above) is therefore a per-peer based QP.
The second field indicates the size of the receive buffer in bytes
(128 bytes).  The third field indicates the number of receive buffers
to allocate to the QP (16).  The fourth field indicates the low
watermark for receive buffers at which time the BTL will repost
receive buffers to the QP (4).

The second QP is:

    S,1024,256,128,32

Shared receive queue based QPs are indicated by a starting field of
"S"; the second QP (shown above) is therefore a shared receive queue
based QP.  The second, third and fourth fields are the same as in the
per-peer based QP.  The fifth field is the number of outstanding sends
that are allowed at a given time on the QP (32).  This provides a
"good enough" mechanism of flow control for some regular communication
patterns.

QPs MUST be specified in ascending receive buffer size order.  This
requirement may be removed prior to 1.3 release.

This commit was SVN r15474.

											
										
										
											2007-07-18 05:15:59 +04:00
+								} mca_btl_openib_qp_type_t;
 								struct mca_btl_openib_pp_qp_info_t {
 								    int32_t rd_win;
 								    int32_t rd_rsv;
 								}; typedef struct mca_btl_openib_pp_qp_info_t mca_btl_openib_pp_qp_info_t;
 								struct mca_btl_openib_srq_qp_info_t {
 								    int32_t sd_max;
-												Adding support for on-demand SRQ pre-post (receive wqe allocation)

This commit was SVN r22313.

											
										
										
											2009-12-15 18:52:10 +03:00
+								    /* The init value for rd_curr_num variables of all SRQs */
 								    int32_t rd_init;
 								    /* The watermark, threshold - if the number of WQEs in SRQ is less then this value =>
 								       the SRQ limit event (IBV_EVENT_SRQ_LIMIT_REACHED) will be generated on corresponding SRQ.
 								       As result the maximal number of pre-posted WQEs on the SRQ will be increased */
 								    int32_t srq_limit;
-												This commit brings in two major things:

1. Galen's fine-grain control of queue pair resources in the openib
   BTL.
1. Pasha's new implementation of asychronous HCA event handling.

Pasha's new implementation doesn't take much explanation, but the new
"multifrag" stuff does.  

Note that "svn merge" was not used to bring this new code from the
/tmp/ib_multifrag branch -- something Bad happened in the periodic
trunk pulls on that branch making an actual merge back to the trunk
effectively impossible (i.e., lots and lots of arbitrary conflicts and
artifical changes).  :-(

== Fine-grain control of queue pair resources ==

Galen's fine-grain control of queue pair resources to the OpenIB BTL
(thanks to Gleb for fixing broken code and providing additional
functionality, Pasha for finding broken code, and Jeff for doing all
the svn work and regression testing).

Prior to this commit, the OpenIB BTL created two queue pairs: one for
eager size fragments and one for max send size fragments.  When the
use of the shared receive queue (SRQ) was specified (via "-mca
btl_openib_use_srq 1"), these QPs would use a shared receive queue for
receive buffers instead of the default per-peer (PP) receive queues
and buffers.  One consequence of this design is that receive buffer
utilization (the size of the data received as a percentage of the
receive buffer used for the data) was quite poor for a number of
applications.

The new design allows multiple QPs to be specified at runtime.  Each
QP can be setup to use PP or SRQ receive buffers as well as giving
fine-grained control over receive buffer size, number of receive
buffers to post, when to replenish the receive queue (low water mark)
and for SRQ QPs, the number of outstanding sends can also be
specified.  The following is an example of the syntax to describe QPs
to the OpenIB BTL using the new MCA parameter btl_openib_receive_queues:

{{{
-mca btl_openib_receive_queues \
     "P,128,16,4;S,1024,256,128,32;S,4096,256,128,32;S,65536,256,128,32"
}}}

Each QP description is delimited by ";" (semicolon) with individual
fields of the QP description delimited by "," (comma).  The above
example therefore describes 4 QPs.

The first QP is:

    P,128,16,4

Meaning: per-peer receive buffer QPs are indicated by a starting field
of "P"; the first QP (shown above) is therefore a per-peer based QP.
The second field indicates the size of the receive buffer in bytes
(128 bytes).  The third field indicates the number of receive buffers
to allocate to the QP (16).  The fourth field indicates the low
watermark for receive buffers at which time the BTL will repost
receive buffers to the QP (4).

The second QP is:

    S,1024,256,128,32

Shared receive queue based QPs are indicated by a starting field of
"S"; the second QP (shown above) is therefore a shared receive queue
based QP.  The second, third and fourth fields are the same as in the
per-peer based QP.  The fifth field is the number of outstanding sends
that are allowed at a given time on the QP (32).  This provides a
"good enough" mechanism of flow control for some regular communication
patterns.

QPs MUST be specified in ascending receive buffer size order.  This
requirement may be removed prior to 1.3 release.

This commit was SVN r15474.

											
										
										
											2007-07-18 05:15:59 +04:00
+								}; typedef struct mca_btl_openib_srq_qp_info_t mca_btl_openib_srq_qp_info_t;
 								struct mca_btl_openib_qp_info_t {
-												We have QP description in component structure, module structure and endpoint.
Each one of them has a field to store QP type, but this is redundant.
Store qp type only in one structure (the component one).

This commit was SVN r16272.

											
										
										
											2007-09-30 20:14:17 +04:00
+								    mca_btl_openib_qp_type_t type;
-												This commit brings in two major things:

1. Galen's fine-grain control of queue pair resources in the openib
   BTL.
1. Pasha's new implementation of asychronous HCA event handling.

Pasha's new implementation doesn't take much explanation, but the new
"multifrag" stuff does.  

Note that "svn merge" was not used to bring this new code from the
/tmp/ib_multifrag branch -- something Bad happened in the periodic
trunk pulls on that branch making an actual merge back to the trunk
effectively impossible (i.e., lots and lots of arbitrary conflicts and
artifical changes).  :-(

== Fine-grain control of queue pair resources ==

Galen's fine-grain control of queue pair resources to the OpenIB BTL
(thanks to Gleb for fixing broken code and providing additional
functionality, Pasha for finding broken code, and Jeff for doing all
the svn work and regression testing).

Prior to this commit, the OpenIB BTL created two queue pairs: one for
eager size fragments and one for max send size fragments.  When the
use of the shared receive queue (SRQ) was specified (via "-mca
btl_openib_use_srq 1"), these QPs would use a shared receive queue for
receive buffers instead of the default per-peer (PP) receive queues
and buffers.  One consequence of this design is that receive buffer
utilization (the size of the data received as a percentage of the
receive buffer used for the data) was quite poor for a number of
applications.

The new design allows multiple QPs to be specified at runtime.  Each
QP can be setup to use PP or SRQ receive buffers as well as giving
fine-grained control over receive buffer size, number of receive
buffers to post, when to replenish the receive queue (low water mark)
and for SRQ QPs, the number of outstanding sends can also be
specified.  The following is an example of the syntax to describe QPs
to the OpenIB BTL using the new MCA parameter btl_openib_receive_queues:

{{{
-mca btl_openib_receive_queues \
     "P,128,16,4;S,1024,256,128,32;S,4096,256,128,32;S,65536,256,128,32"
}}}

Each QP description is delimited by ";" (semicolon) with individual
fields of the QP description delimited by "," (comma).  The above
example therefore describes 4 QPs.

The first QP is:

    P,128,16,4

Meaning: per-peer receive buffer QPs are indicated by a starting field
of "P"; the first QP (shown above) is therefore a per-peer based QP.
The second field indicates the size of the receive buffer in bytes
(128 bytes).  The third field indicates the number of receive buffers
to allocate to the QP (16).  The fourth field indicates the low
watermark for receive buffers at which time the BTL will repost
receive buffers to the QP (4).

The second QP is:

    S,1024,256,128,32

Shared receive queue based QPs are indicated by a starting field of
"S"; the second QP (shown above) is therefore a shared receive queue
based QP.  The second, third and fourth fields are the same as in the
per-peer based QP.  The fifth field is the number of outstanding sends
that are allowed at a given time on the QP (32).  This provides a
"good enough" mechanism of flow control for some regular communication
patterns.

QPs MUST be specified in ascending receive buffer size order.  This
requirement may be removed prior to 1.3 release.

This commit was SVN r15474.

											
										
										
											2007-07-18 05:15:59 +04:00
+								    size_t size;
 								    int32_t rd_num;
 								    int32_t rd_low;
-												Remove trailing whitespaces. No code changes in this commit.

This commit was SVN r17167.

											
										
										
											2008-01-21 15:11:18 +03:00
+								    union {
-												This commit brings in two major things:

1. Galen's fine-grain control of queue pair resources in the openib
   BTL.
1. Pasha's new implementation of asychronous HCA event handling.

Pasha's new implementation doesn't take much explanation, but the new
"multifrag" stuff does.  

Note that "svn merge" was not used to bring this new code from the
/tmp/ib_multifrag branch -- something Bad happened in the periodic
trunk pulls on that branch making an actual merge back to the trunk
effectively impossible (i.e., lots and lots of arbitrary conflicts and
artifical changes).  :-(

== Fine-grain control of queue pair resources ==

Galen's fine-grain control of queue pair resources to the OpenIB BTL
(thanks to Gleb for fixing broken code and providing additional
functionality, Pasha for finding broken code, and Jeff for doing all
the svn work and regression testing).

Prior to this commit, the OpenIB BTL created two queue pairs: one for
eager size fragments and one for max send size fragments.  When the
use of the shared receive queue (SRQ) was specified (via "-mca
btl_openib_use_srq 1"), these QPs would use a shared receive queue for
receive buffers instead of the default per-peer (PP) receive queues
and buffers.  One consequence of this design is that receive buffer
utilization (the size of the data received as a percentage of the
receive buffer used for the data) was quite poor for a number of
applications.

The new design allows multiple QPs to be specified at runtime.  Each
QP can be setup to use PP or SRQ receive buffers as well as giving
fine-grained control over receive buffer size, number of receive
buffers to post, when to replenish the receive queue (low water mark)
and for SRQ QPs, the number of outstanding sends can also be
specified.  The following is an example of the syntax to describe QPs
to the OpenIB BTL using the new MCA parameter btl_openib_receive_queues:

{{{
-mca btl_openib_receive_queues \
     "P,128,16,4;S,1024,256,128,32;S,4096,256,128,32;S,65536,256,128,32"
}}}

Each QP description is delimited by ";" (semicolon) with individual
fields of the QP description delimited by "," (comma).  The above
example therefore describes 4 QPs.

The first QP is:

    P,128,16,4

Meaning: per-peer receive buffer QPs are indicated by a starting field
of "P"; the first QP (shown above) is therefore a per-peer based QP.
The second field indicates the size of the receive buffer in bytes
(128 bytes).  The third field indicates the number of receive buffers
to allocate to the QP (16).  The fourth field indicates the low
watermark for receive buffers at which time the BTL will repost
receive buffers to the QP (4).

The second QP is:

    S,1024,256,128,32

Shared receive queue based QPs are indicated by a starting field of
"S"; the second QP (shown above) is therefore a shared receive queue
based QP.  The second, third and fourth fields are the same as in the
per-peer based QP.  The fifth field is the number of outstanding sends
that are allowed at a given time on the QP (32).  This provides a
"good enough" mechanism of flow control for some regular communication
patterns.

QPs MUST be specified in ascending receive buffer size order.  This
requirement may be removed prior to 1.3 release.

This commit was SVN r15474.

											
										
										
											2007-07-18 05:15:59 +04:00
+								        mca_btl_openib_pp_qp_info_t pp_qp;
 								        mca_btl_openib_srq_qp_info_t srq_qp;
-												Remove trailing whitespaces. No code changes in this commit.

This commit was SVN r17167.

											
										
										
											2008-01-21 15:11:18 +03:00
+								    } u;
-												This commit brings in two major things:

1. Galen's fine-grain control of queue pair resources in the openib
   BTL.
1. Pasha's new implementation of asychronous HCA event handling.

Pasha's new implementation doesn't take much explanation, but the new
"multifrag" stuff does.  

Note that "svn merge" was not used to bring this new code from the
/tmp/ib_multifrag branch -- something Bad happened in the periodic
trunk pulls on that branch making an actual merge back to the trunk
effectively impossible (i.e., lots and lots of arbitrary conflicts and
artifical changes).  :-(

== Fine-grain control of queue pair resources ==

Galen's fine-grain control of queue pair resources to the OpenIB BTL
(thanks to Gleb for fixing broken code and providing additional
functionality, Pasha for finding broken code, and Jeff for doing all
the svn work and regression testing).

Prior to this commit, the OpenIB BTL created two queue pairs: one for
eager size fragments and one for max send size fragments.  When the
use of the shared receive queue (SRQ) was specified (via "-mca
btl_openib_use_srq 1"), these QPs would use a shared receive queue for
receive buffers instead of the default per-peer (PP) receive queues
and buffers.  One consequence of this design is that receive buffer
utilization (the size of the data received as a percentage of the
receive buffer used for the data) was quite poor for a number of
applications.

The new design allows multiple QPs to be specified at runtime.  Each
QP can be setup to use PP or SRQ receive buffers as well as giving
fine-grained control over receive buffer size, number of receive
buffers to post, when to replenish the receive queue (low water mark)
and for SRQ QPs, the number of outstanding sends can also be
specified.  The following is an example of the syntax to describe QPs
to the OpenIB BTL using the new MCA parameter btl_openib_receive_queues:

{{{
-mca btl_openib_receive_queues \
     "P,128,16,4;S,1024,256,128,32;S,4096,256,128,32;S,65536,256,128,32"
}}}

Each QP description is delimited by ";" (semicolon) with individual
fields of the QP description delimited by "," (comma).  The above
example therefore describes 4 QPs.

The first QP is:

    P,128,16,4

Meaning: per-peer receive buffer QPs are indicated by a starting field
of "P"; the first QP (shown above) is therefore a per-peer based QP.
The second field indicates the size of the receive buffer in bytes
(128 bytes).  The third field indicates the number of receive buffers
to allocate to the QP (16).  The fourth field indicates the low
watermark for receive buffers at which time the BTL will repost
receive buffers to the QP (4).

The second QP is:

    S,1024,256,128,32

Shared receive queue based QPs are indicated by a starting field of
"S"; the second QP (shown above) is therefore a shared receive queue
based QP.  The second, third and fourth fields are the same as in the
per-peer based QP.  The fifth field is the number of outstanding sends
that are allowed at a given time on the QP (32).  This provides a
"good enough" mechanism of flow control for some regular communication
patterns.

QPs MUST be specified in ascending receive buffer size order.  This
requirement may be removed prior to 1.3 release.

This commit was SVN r15474.

											
										
										
											2007-07-18 05:15:59 +04:00
+								}; typedef struct mca_btl_openib_qp_info_t mca_btl_openib_qp_info_t;
-												We have QP description in component structure, module structure and endpoint.
Each one of them has a field to store QP type, but this is redundant.
Store qp type only in one structure (the component one).

This commit was SVN r16272.

											
										
										
											2007-09-30 20:14:17 +04:00
+								#define BTL_OPENIB_QP_TYPE(Q) (mca_btl_openib_component.qp_infos[(Q)].type)
 								#define BTL_OPENIB_QP_TYPE_PP(Q) \
 								    (BTL_OPENIB_QP_TYPE(Q) == MCA_BTL_OPENIB_PP_QP)
 								#define BTL_OPENIB_QP_TYPE_SRQ(Q) \
 								    (BTL_OPENIB_QP_TYPE(Q) == MCA_BTL_OPENIB_SRQ_QP)
-												Initial XRC support by Mellanox.

This commit was SVN r16787.

											
										
										
											2007-11-28 10:18:59 +03:00
+								#define BTL_OPENIB_QP_TYPE_XRC(Q) \
 								    (BTL_OPENIB_QP_TYPE(Q) == MCA_BTL_OPENIB_XRC_QP)
-												This commit brings in two major things:

1. Galen's fine-grain control of queue pair resources in the openib
   BTL.
1. Pasha's new implementation of asychronous HCA event handling.

Pasha's new implementation doesn't take much explanation, but the new
"multifrag" stuff does.  

Note that "svn merge" was not used to bring this new code from the
/tmp/ib_multifrag branch -- something Bad happened in the periodic
trunk pulls on that branch making an actual merge back to the trunk
effectively impossible (i.e., lots and lots of arbitrary conflicts and
artifical changes).  :-(

== Fine-grain control of queue pair resources ==

Galen's fine-grain control of queue pair resources to the OpenIB BTL
(thanks to Gleb for fixing broken code and providing additional
functionality, Pasha for finding broken code, and Jeff for doing all
the svn work and regression testing).

Prior to this commit, the OpenIB BTL created two queue pairs: one for
eager size fragments and one for max send size fragments.  When the
use of the shared receive queue (SRQ) was specified (via "-mca
btl_openib_use_srq 1"), these QPs would use a shared receive queue for
receive buffers instead of the default per-peer (PP) receive queues
and buffers.  One consequence of this design is that receive buffer
utilization (the size of the data received as a percentage of the
receive buffer used for the data) was quite poor for a number of
applications.

The new design allows multiple QPs to be specified at runtime.  Each
QP can be setup to use PP or SRQ receive buffers as well as giving
fine-grained control over receive buffer size, number of receive
buffers to post, when to replenish the receive queue (low water mark)
and for SRQ QPs, the number of outstanding sends can also be
specified.  The following is an example of the syntax to describe QPs
to the OpenIB BTL using the new MCA parameter btl_openib_receive_queues:

{{{
-mca btl_openib_receive_queues \
     "P,128,16,4;S,1024,256,128,32;S,4096,256,128,32;S,65536,256,128,32"
}}}

Each QP description is delimited by ";" (semicolon) with individual
fields of the QP description delimited by "," (comma).  The above
example therefore describes 4 QPs.

The first QP is:

    P,128,16,4

Meaning: per-peer receive buffer QPs are indicated by a starting field
of "P"; the first QP (shown above) is therefore a per-peer based QP.
The second field indicates the size of the receive buffer in bytes
(128 bytes).  The third field indicates the number of receive buffers
to allocate to the QP (16).  The fourth field indicates the low
watermark for receive buffers at which time the BTL will repost
receive buffers to the QP (4).

The second QP is:

    S,1024,256,128,32

Shared receive queue based QPs are indicated by a starting field of
"S"; the second QP (shown above) is therefore a shared receive queue
based QP.  The second, third and fourth fields are the same as in the
per-peer based QP.  The fifth field is the number of outstanding sends
that are allowed at a given time on the QP (32).  This provides a
"good enough" mechanism of flow control for some regular communication
patterns.

QPs MUST be specified in ascending receive buffer size order.  This
requirement may be removed prior to 1.3 release.

This commit was SVN r15474.

											
										
										
											2007-07-18 05:15:59 +04:00
-												Fixes trac:1285.  Really.

This commit has the same commit message as r18450, but without the
extra bonus memory corruption that was introduced.

This commit was SVN r18467.

The following SVN revision numbers were found above:
  r18450 --> open-mpi/ompi@5295902ebec36d49c92edcd3631978d822a107cc

The following Trac tickets were found above:
  Ticket 1285 --> https://svn.open-mpi.org/trac/ompi/ticket/1285

											
										
										
											2008-05-21 01:53:42 +04:00
+								typedef enum {
 								    BTL_OPENIB_RQ_SOURCE_DEFAULT,
 								    BTL_OPENIB_RQ_SOURCE_MCA,
-												Fixes trac:1295: change language in openib BTL from IB-specific to be
"!OpenFabrics" / neutral (i.e., refer to IB and/or iWARP).

 * Mostly just type, variable/field, and funcion name changes, such as
   s/hca/device/g, etc.  
 * Changed the INI file for the hardware-specific parameters to be
   mca-btl-openib-device-params.ini.
 * Updated a lot of help messages in the help-*.txt files, not just to
   update it to be !OpenFabrics/neutral language, but also for some
   consistency of tone, indenting, etc.
 * Deprecated a bunch of MCA params in favor of language-neutral new
   ones:
   * btl_openib_warn_no_hca_params_found (s/hca/device/)
   * btl_openib_hca_param_files
   * btl_openib_ib_cq_size (s/_ib_/_of_/)
   * btl_openib_ib_max_inline_data
   * btl_openib_ib_psn
   * btl_openib_ib_mtu
   * btl_openib_ib_pkey_ix
   * btl_openib_ib_pkey_val

This commit was SVN r18985.

The following Trac tickets were found above:
  Ticket 1295 --> https://svn.open-mpi.org/trac/ompi/ticket/1295

											
										
										
											2008-07-23 04:28:59 +04:00
+								    BTL_OPENIB_RQ_SOURCE_DEVICE_INI,
 								    BTL_OPENIB_RQ_SOURCE_MAX
-												Fixes trac:1285.  Really.

This commit has the same commit message as r18450, but without the
extra bonus memory corruption that was introduced.

This commit was SVN r18467.

The following SVN revision numbers were found above:
  r18450 --> open-mpi/ompi@5295902ebec36d49c92edcd3631978d822a107cc

The following Trac tickets were found above:
  Ticket 1285 --> https://svn.open-mpi.org/trac/ompi/ticket/1285

											
										
										
											2008-05-21 01:53:42 +04:00
+								} btl_openib_receive_queues_source_t;
-												Fixes trac:1210, #1319

Commit from a long-standing Mercurial tree that ended up incorporating a lot of things:

 * A few fixes for CPC interface changes in all the CPCs
 * Attempts (but not yet finished) to fix shutdown problems in the IB CM CPC
 * #1319: add CTS support (i.e., initiator guarantees to send first message; automatically activated for iWARP over the RDMA CM CPC)
   * Some variable and function renamings to make this be generic (e.g., alloc_credit_frag became alloc_control_frag)
   * CPCs no longer post receive buffers; they only post a single receive buffer for the CTS if they use CTS. Instead, the main BTL now posts the main sets of receive buffers. 
   * CPCs allocate a CTS buffer only if they're about to make a connection
 * RDMA CM improvements:
   * Use threaded mode openib fd monitoring to wait for for RDMA CM events
   * Synchronize endpoint finalization and disconnection between main thread and service thread to avoid/fix some race conditions
   * Converted several structs to be OBJs so that we can use reference counting to know when to invoke destructors
   * Make some new OBJ's have opal_list_item_t's as their base, thereby eliminating the need for the local list_item_t type
   * Renamed many variables to be internally consistent
   * Centralize the decision in an inline function as to whether this process or the remote process is supposed to be the initiator
   * Add oodles of OPAL_OUTPUT statements for debugging (hard-wired to output stream -1; to be activated by developers if they want/need them) 
   * Use rdma_create_qp() instead of ibv_create_qp()
 * openib fd monitoring improvements:
   * Renamed a bunch of functions and variables to be a little more obvious as to their true function
   * Use pipes to communicate between main thread and service thread
   * Add ability for main thread to invoke a function back on the service thread 
   * Ensure to set initiator_depth and responder_resources properly, but putting max_qp_rd_ataom and ma_qp_init_rd_atom in the modex (see rdma_connect(3))
   * Ensure to set the source IP address in rdma_resolve() to ensure that we select the correct OpenFabrics source port
   * Make new MCA param: openib_btl_connect_rdmacm_resolve_timeout
 * Other improvements:
   * btl_openib_device_type MCA param: can be "iw" or "ib" or "all" (or "infiniband" or "iwarp")
   * Somewhat improved error handling
   * Bunches of spelling fixes in comments, VERBOSE, and OUTPUT statements
   * Oodles of little coding style fixes
   * Changed shutdown ordering of btl; the device is now an OBJ with ref counting for destruction
   * Added some more show_help error messages
   * Change configury to only build IBCM / RDMACM if we have threads (because we need a progress thread) 

This commit was SVN r19686.

The following Trac tickets were found above:
  Ticket 1210 --> https://svn.open-mpi.org/trac/ompi/ticket/1210

											
										
										
											2008-10-06 04:46:02 +04:00
+								typedef enum {
 								    BTL_OPENIB_DT_IB,
 								    BTL_OPENIB_DT_IWARP,
 								    BTL_OPENIB_DT_ALL
 								} btl_openib_device_type_t;
-												Adding a hash table for management dependences between SRQs and their BTL modules.

This commit was SVN r22653.

											
										
										
											2010-02-18 12:48:16 +03:00
+								#if OPAL_HAVE_THREADS
 								/* The structer for manage all BTL SRQs */
 								typedef struct mca_btl_openib_srq_manager_t {
 								    opal_mutex_t lock;
 								    /* The keys of this hash table are addresses of
 								       SRQs structures, and the elements are BTL modules
 								       pointers that associated with these SRQs */
 								    opal_hash_table_t srq_addr_table;
 								} mca_btl_openib_srq_manager_t;
 								#endif
-												removed the ib ptl and fixed a few conflicts in my previous commit

This commit was SVN r6244.

											
										
										
											2005-07-01 01:28:35 +04:00
+								struct mca_btl_openib_component_t {
-												Fixes trac:1392, #1400

 * add "register" function to mca_base_component_t
   * converted coll:basic and paffinity:linux and paffinity:solaris to
     use this function
   * we'll convert the rest over time (I'll file a ticket once all
     this is committed)
 * add 32 bytes of "reserved" space to the end of mca_base_component_t
   and mca_base_component_data_2_0_0_t to make future upgrades
   [slightly] easier
   * new mca_base_component_t size: 196 bytes
   * new mca_base_component_data_2_0_0_t size: 36 bytes
 * MCA base version bumped to v2.0
   * '''We now refuse to load components that are not MCA v2.0.x'''
 * all MCA frameworks versions bumped to v2.0
 * be a little more explicit about version numbers in the MCA base
   * add big comment in mca.h about versioning philosophy

This commit was SVN r19073.

The following Trac tickets were found above:
  Ticket 1392 --> https://svn.open-mpi.org/trac/ompi/ticket/1392

											
										
										
											2008-07-29 02:40:57 +04:00
+								    mca_btl_base_component_2_0_0_t          super;  /**< base BTL component */
-												Add max_btls option 

This commit was SVN r9263.

											
										
										
											2006-03-13 20:03:21 +03:00
 								    int                                ib_max_btls;
-												Fixes trac:1295: change language in openib BTL from IB-specific to be
"!OpenFabrics" / neutral (i.e., refer to IB and/or iWARP).

 * Mostly just type, variable/field, and funcion name changes, such as
   s/hca/device/g, etc.  
 * Changed the INI file for the hardware-specific parameters to be
   mca-btl-openib-device-params.ini.
 * Updated a lot of help messages in the help-*.txt files, not just to
   update it to be !OpenFabrics/neutral language, but also for some
   consistency of tone, indenting, etc.
 * Deprecated a bunch of MCA params in favor of language-neutral new
   ones:
   * btl_openib_warn_no_hca_params_found (s/hca/device/)
   * btl_openib_hca_param_files
   * btl_openib_ib_cq_size (s/_ib_/_of_/)
   * btl_openib_ib_max_inline_data
   * btl_openib_ib_psn
   * btl_openib_ib_mtu
   * btl_openib_ib_pkey_ix
   * btl_openib_ib_pkey_val

This commit was SVN r18985.

The following Trac tickets were found above:
  Ticket 1295 --> https://svn.open-mpi.org/trac/ompi/ticket/1295

											
										
										
											2008-07-23 04:28:59 +04:00
+								    /**< maximum number of devices available to openib component */
-												Remove trailing whitespaces. No code changes in this commit.

This commit was SVN r17167.

											
										
										
											2008-01-21 15:11:18 +03:00
-												Fix to allow eager limit and max send size to be any size (within resource limitations). Instead of storing the ompi_free_list_t * in the fragment, we use the frag type enum, this tells us where the frag came from and where it should return.. This could also be done in mvapi but is not a high priority moving forward.. 

Review by Brian, needs to hit the trunk + 1.1 release.. 

This commit was SVN r10157.

											
										
										
											2006-06-01 06:32:18 +04:00
+								    int                                ib_num_btls;
-												Fixes trac:1295: change language in openib BTL from IB-specific to be
"!OpenFabrics" / neutral (i.e., refer to IB and/or iWARP).

 * Mostly just type, variable/field, and funcion name changes, such as
   s/hca/device/g, etc.  
 * Changed the INI file for the hardware-specific parameters to be
   mca-btl-openib-device-params.ini.
 * Updated a lot of help messages in the help-*.txt files, not just to
   update it to be !OpenFabrics/neutral language, but also for some
   consistency of tone, indenting, etc.
 * Deprecated a bunch of MCA params in favor of language-neutral new
   ones:
   * btl_openib_warn_no_hca_params_found (s/hca/device/)
   * btl_openib_hca_param_files
   * btl_openib_ib_cq_size (s/_ib_/_of_/)
   * btl_openib_ib_max_inline_data
   * btl_openib_ib_psn
   * btl_openib_ib_mtu
   * btl_openib_ib_pkey_ix
   * btl_openib_ib_pkey_val

This commit was SVN r18985.

The following Trac tickets were found above:
  Ticket 1295 --> https://svn.open-mpi.org/trac/ompi/ticket/1295

											
										
										
											2008-07-23 04:28:59 +04:00
+								    /**< number of devices available to the openib component */
-												removed the ib ptl and fixed a few conflicts in my previous commit

This commit was SVN r6244.

											
										
										
											2005-07-01 01:28:35 +04:00
-												Adding:
* openib_finalize flow for openib btl
* async event handler for openib btl

This commit was SVN r14623.

											
										
										
											2007-05-09 01:47:21 +04:00
+								    struct mca_btl_openib_module_t             **openib_btls;
-												 - Add a few comments for elements for structs, a few spelling fixes.
   No functional change.

This commit was SVN r14534.

											
										
										
											2007-04-27 01:03:38 +04:00
+								    /**< array of available BTLs */
-												removed the ib ptl and fixed a few conflicts in my previous commit

This commit was SVN r6244.

											
										
										
											2005-07-01 01:28:35 +04:00
-												Fixes trac:1295: change language in openib BTL from IB-specific to be
"!OpenFabrics" / neutral (i.e., refer to IB and/or iWARP).

 * Mostly just type, variable/field, and funcion name changes, such as
   s/hca/device/g, etc.  
 * Changed the INI file for the hardware-specific parameters to be
   mca-btl-openib-device-params.ini.
 * Updated a lot of help messages in the help-*.txt files, not just to
   update it to be !OpenFabrics/neutral language, but also for some
   consistency of tone, indenting, etc.
 * Deprecated a bunch of MCA params in favor of language-neutral new
   ones:
   * btl_openib_warn_no_hca_params_found (s/hca/device/)
   * btl_openib_hca_param_files
   * btl_openib_ib_cq_size (s/_ib_/_of_/)
   * btl_openib_ib_max_inline_data
   * btl_openib_ib_psn
   * btl_openib_ib_mtu
   * btl_openib_ib_pkey_ix
   * btl_openib_ib_pkey_val

This commit was SVN r18985.

The following Trac tickets were found above:
  Ticket 1295 --> https://svn.open-mpi.org/trac/ompi/ticket/1295

											
										
										
											2008-07-23 04:28:59 +04:00
+								    opal_pointer_array_t devices; /**< array of available devices */
 								    int devices_count;
-												ompi_pointer_array_get_size doesn't return how much elements are actually in an
array, so count them by ourselves.

This commit was SVN r15943.

											
										
										
											2007-08-22 13:31:12 +04:00
-												removed the ib ptl and fixed a few conflicts in my previous commit

This commit was SVN r6244.

											
										
										
											2005-07-01 01:28:35 +04:00
+								    int ib_free_list_num;
 								    /**< initial size of free lists */
 								    int ib_free_list_max;
 								    /**< maximum size of free lists */
 								    int ib_free_list_inc;
 								    /**< number of elements to alloc when growing free lists */
-												* rename ompi_list to opal_list

This commit was SVN r6322.

											
										
										
											2005-07-03 20:22:16 +04:00
+								    opal_list_t                             ib_procs;
-												removed the ib ptl and fixed a few conflicts in my previous commit

This commit was SVN r6244.

											
										
										
											2005-07-01 01:28:35 +04:00
+								    /**< list of ib proc structures */
-												* rename ompi_event to opal_event

This commit was SVN r6328.

											
										
										
											2005-07-04 03:09:55 +04:00
+								    opal_event_t                            ib_send_event;
-												removed the ib ptl and fixed a few conflicts in my previous commit

This commit was SVN r6244.

											
										
										
											2005-07-01 01:28:35 +04:00
+								    /**< event structure for sends */
-												* rename ompi_event to opal_event

This commit was SVN r6328.

											
										
										
											2005-07-04 03:09:55 +04:00
+								    opal_event_t                            ib_recv_event;
-												removed the ib ptl and fixed a few conflicts in my previous commit

This commit was SVN r6244.

											
										
										
											2005-07-01 01:28:35 +04:00
+								    /**< event structure for recvs */
-												* rename locking code from ompi to opal

This commit was SVN r6327.

											
										
										
											2005-07-04 02:45:48 +04:00
+								    opal_mutex_t                            ib_lock;
-												removed the ib ptl and fixed a few conflicts in my previous commit

This commit was SVN r6244.

											
										
										
											2005-07-01 01:28:35 +04:00
+								    /**< lock for accessing module state */
-												Remove trailing whitespaces. No code changes in this commit.

This commit was SVN r17167.

											
										
										
											2008-01-21 15:11:18 +03:00
+								    char* ib_mpool_name;
 								    /**< name of ib memory pool */
-												 - Add a few comments for elements for structs, a few spelling fixes.
   No functional change.

This commit was SVN r14534.

											
										
										
											2007-04-27 01:03:38 +04:00
-												This commit brings in two major things:

1. Galen's fine-grain control of queue pair resources in the openib
   BTL.
1. Pasha's new implementation of asychronous HCA event handling.

Pasha's new implementation doesn't take much explanation, but the new
"multifrag" stuff does.  

Note that "svn merge" was not used to bring this new code from the
/tmp/ib_multifrag branch -- something Bad happened in the periodic
trunk pulls on that branch making an actual merge back to the trunk
effectively impossible (i.e., lots and lots of arbitrary conflicts and
artifical changes).  :-(

== Fine-grain control of queue pair resources ==

Galen's fine-grain control of queue pair resources to the OpenIB BTL
(thanks to Gleb for fixing broken code and providing additional
functionality, Pasha for finding broken code, and Jeff for doing all
the svn work and regression testing).

Prior to this commit, the OpenIB BTL created two queue pairs: one for
eager size fragments and one for max send size fragments.  When the
use of the shared receive queue (SRQ) was specified (via "-mca
btl_openib_use_srq 1"), these QPs would use a shared receive queue for
receive buffers instead of the default per-peer (PP) receive queues
and buffers.  One consequence of this design is that receive buffer
utilization (the size of the data received as a percentage of the
receive buffer used for the data) was quite poor for a number of
applications.

The new design allows multiple QPs to be specified at runtime.  Each
QP can be setup to use PP or SRQ receive buffers as well as giving
fine-grained control over receive buffer size, number of receive
buffers to post, when to replenish the receive queue (low water mark)
and for SRQ QPs, the number of outstanding sends can also be
specified.  The following is an example of the syntax to describe QPs
to the OpenIB BTL using the new MCA parameter btl_openib_receive_queues:

{{{
-mca btl_openib_receive_queues \
     "P,128,16,4;S,1024,256,128,32;S,4096,256,128,32;S,65536,256,128,32"
}}}

Each QP description is delimited by ";" (semicolon) with individual
fields of the QP description delimited by "," (comma).  The above
example therefore describes 4 QPs.

The first QP is:

    P,128,16,4

Meaning: per-peer receive buffer QPs are indicated by a starting field
of "P"; the first QP (shown above) is therefore a per-peer based QP.
The second field indicates the size of the receive buffer in bytes
(128 bytes).  The third field indicates the number of receive buffers
to allocate to the QP (16).  The fourth field indicates the low
watermark for receive buffers at which time the BTL will repost
receive buffers to the QP (4).

The second QP is:

    S,1024,256,128,32

Shared receive queue based QPs are indicated by a starting field of
"S"; the second QP (shown above) is therefore a shared receive queue
based QP.  The second, third and fourth fields are the same as in the
per-peer based QP.  The fifth field is the number of outstanding sends
that are allowed at a given time on the QP (32).  This provides a
"good enough" mechanism of flow control for some regular communication
patterns.

QPs MUST be specified in ascending receive buffer size order.  This
requirement may be removed prior to 1.3 release.

This commit was SVN r15474.

											
										
										
											2007-07-18 05:15:59 +04:00
+								    uint8_t num_pp_qps;          /**< number of pp qp's */
 								    uint8_t num_srq_qps;         /**< number of srq qp's */
-												Initial XRC support by Mellanox.

This commit was SVN r16787.

											
										
										
											2007-11-28 10:18:59 +03:00
+								    uint8_t num_xrc_qps;         /**< number of xrc qp's */
-												This commit brings in two major things:

1. Galen's fine-grain control of queue pair resources in the openib
   BTL.
1. Pasha's new implementation of asychronous HCA event handling.

Pasha's new implementation doesn't take much explanation, but the new
"multifrag" stuff does.  

Note that "svn merge" was not used to bring this new code from the
/tmp/ib_multifrag branch -- something Bad happened in the periodic
trunk pulls on that branch making an actual merge back to the trunk
effectively impossible (i.e., lots and lots of arbitrary conflicts and
artifical changes).  :-(

== Fine-grain control of queue pair resources ==

Galen's fine-grain control of queue pair resources to the OpenIB BTL
(thanks to Gleb for fixing broken code and providing additional
functionality, Pasha for finding broken code, and Jeff for doing all
the svn work and regression testing).

Prior to this commit, the OpenIB BTL created two queue pairs: one for
eager size fragments and one for max send size fragments.  When the
use of the shared receive queue (SRQ) was specified (via "-mca
btl_openib_use_srq 1"), these QPs would use a shared receive queue for
receive buffers instead of the default per-peer (PP) receive queues
and buffers.  One consequence of this design is that receive buffer
utilization (the size of the data received as a percentage of the
receive buffer used for the data) was quite poor for a number of
applications.

The new design allows multiple QPs to be specified at runtime.  Each
QP can be setup to use PP or SRQ receive buffers as well as giving
fine-grained control over receive buffer size, number of receive
buffers to post, when to replenish the receive queue (low water mark)
and for SRQ QPs, the number of outstanding sends can also be
specified.  The following is an example of the syntax to describe QPs
to the OpenIB BTL using the new MCA parameter btl_openib_receive_queues:

{{{
-mca btl_openib_receive_queues \
     "P,128,16,4;S,1024,256,128,32;S,4096,256,128,32;S,65536,256,128,32"
}}}

Each QP description is delimited by ";" (semicolon) with individual
fields of the QP description delimited by "," (comma).  The above
example therefore describes 4 QPs.

The first QP is:

    P,128,16,4

Meaning: per-peer receive buffer QPs are indicated by a starting field
of "P"; the first QP (shown above) is therefore a per-peer based QP.
The second field indicates the size of the receive buffer in bytes
(128 bytes).  The third field indicates the number of receive buffers
to allocate to the QP (16).  The fourth field indicates the low
watermark for receive buffers at which time the BTL will repost
receive buffers to the QP (4).

The second QP is:

    S,1024,256,128,32

Shared receive queue based QPs are indicated by a starting field of
"S"; the second QP (shown above) is therefore a shared receive queue
based QP.  The second, third and fourth fields are the same as in the
per-peer based QP.  The fifth field is the number of outstanding sends
that are allowed at a given time on the QP (32).  This provides a
"good enough" mechanism of flow control for some regular communication
patterns.

QPs MUST be specified in ascending receive buffer size order.  This
requirement may be removed prior to 1.3 release.

This commit was SVN r15474.

											
										
										
											2007-07-18 05:15:59 +04:00
+								    uint8_t num_qps;             /**< total number of qp's */
-												Remove trailing whitespaces. No code changes in this commit.

This commit was SVN r17167.

											
										
										
											2008-01-21 15:11:18 +03:00
-												Initial XRC support by Mellanox.

This commit was SVN r16787.

											
										
										
											2007-11-28 10:18:59 +03:00
+								    opal_hash_table_t ib_addr_table; /**< used only for xrc.hash-table that
 								                                       keeps table of all lids/subnets */
-												This commit brings in two major things:

1. Galen's fine-grain control of queue pair resources in the openib
   BTL.
1. Pasha's new implementation of asychronous HCA event handling.

Pasha's new implementation doesn't take much explanation, but the new
"multifrag" stuff does.  

Note that "svn merge" was not used to bring this new code from the
/tmp/ib_multifrag branch -- something Bad happened in the periodic
trunk pulls on that branch making an actual merge back to the trunk
effectively impossible (i.e., lots and lots of arbitrary conflicts and
artifical changes).  :-(

== Fine-grain control of queue pair resources ==

Galen's fine-grain control of queue pair resources to the OpenIB BTL
(thanks to Gleb for fixing broken code and providing additional
functionality, Pasha for finding broken code, and Jeff for doing all
the svn work and regression testing).

Prior to this commit, the OpenIB BTL created two queue pairs: one for
eager size fragments and one for max send size fragments.  When the
use of the shared receive queue (SRQ) was specified (via "-mca
btl_openib_use_srq 1"), these QPs would use a shared receive queue for
receive buffers instead of the default per-peer (PP) receive queues
and buffers.  One consequence of this design is that receive buffer
utilization (the size of the data received as a percentage of the
receive buffer used for the data) was quite poor for a number of
applications.

The new design allows multiple QPs to be specified at runtime.  Each
QP can be setup to use PP or SRQ receive buffers as well as giving
fine-grained control over receive buffer size, number of receive
buffers to post, when to replenish the receive queue (low water mark)
and for SRQ QPs, the number of outstanding sends can also be
specified.  The following is an example of the syntax to describe QPs
to the OpenIB BTL using the new MCA parameter btl_openib_receive_queues:

{{{
-mca btl_openib_receive_queues \
     "P,128,16,4;S,1024,256,128,32;S,4096,256,128,32;S,65536,256,128,32"
}}}

Each QP description is delimited by ";" (semicolon) with individual
fields of the QP description delimited by "," (comma).  The above
example therefore describes 4 QPs.

The first QP is:

    P,128,16,4

Meaning: per-peer receive buffer QPs are indicated by a starting field
of "P"; the first QP (shown above) is therefore a per-peer based QP.
The second field indicates the size of the receive buffer in bytes
(128 bytes).  The third field indicates the number of receive buffers
to allocate to the QP (16).  The fourth field indicates the low
watermark for receive buffers at which time the BTL will repost
receive buffers to the QP (4).

The second QP is:

    S,1024,256,128,32

Shared receive queue based QPs are indicated by a starting field of
"S"; the second QP (shown above) is therefore a shared receive queue
based QP.  The second, third and fourth fields are the same as in the
per-peer based QP.  The fifth field is the number of outstanding sends
that are allowed at a given time on the QP (32).  This provides a
"good enough" mechanism of flow control for some regular communication
patterns.

QPs MUST be specified in ascending receive buffer size order.  This
requirement may be removed prior to 1.3 release.

This commit was SVN r15474.

											
										
										
											2007-07-18 05:15:59 +04:00
+								    mca_btl_openib_qp_info_t* qp_infos;
-												Remove trailing whitespaces. No code changes in this commit.

This commit was SVN r17167.

											
										
										
											2008-01-21 15:11:18 +03:00
-												 - Add a few comments for elements for structs, a few spelling fixes.
   No functional change.

This commit was SVN r14534.

											
										
										
											2007-04-27 01:03:38 +04:00
+								    size_t eager_limit;      /**< Eager send limit of first fragment, in Bytes */
 								    size_t max_send_size;    /**< Maximum send size, in Bytes */
-												cosmetic fixes in openib btl:
* replace tabs with ws
* remove unnecessary casting
* use proper escape codes for printf() like functions

This commit was SVN r24445.

											
										
										
											2011-02-23 18:50:37 +03:00
+								    uint32_t max_hw_msg_size;/**< Maximum message size for RDMA protocols in Bytes */
-												 - Add a few comments for elements for structs, a few spelling fixes.
   No functional change.

This commit was SVN r14534.

											
										
										
											2007-04-27 01:03:38 +04:00
+								    uint32_t reg_mru_len;    /**< Length of the registration cache most recently used list */
-												Remove trailing whitespaces. No code changes in this commit.

This commit was SVN r17167.

											
										
										
											2008-01-21 15:11:18 +03:00
+								    uint32_t use_srq;        /**< Use the Shared Receive Queue (SRQ mode) */
 								    uint32_t ib_cq_size[2];  /**< Max outstanding CQE on the CQ */
-												Fixes trac:1355: allow INI file to set max_inline_data vale, and if not
specified, probe for max value supported by device.

This commit was SVN r18720.

The following Trac tickets were found above:
  Ticket 1355 --> https://svn.open-mpi.org/trac/ompi/ticket/1355

											
										
										
											2008-06-24 21:18:07 +04:00
+								    int32_t ib_max_inline_data; /**< Max size of inline data */
-												Add pkey value MCA parameter. if this param is used,
only ports with the actual pkey value will be initiate.

This commit was SVN r14463.

											
										
										
											2007-04-22 14:22:12 +04:00
+								    uint32_t ib_pkey_val;
-												Remove trailing whitespaces. No code changes in this commit.

This commit was SVN r17167.

											
										
										
											2008-01-21 15:11:18 +03:00
+								    uint32_t ib_psn;
 								    uint32_t ib_qp_ous_rd_atom;
 								    uint32_t ib_mtu;
 								    uint32_t ib_min_rnr_timer;
 								    uint32_t ib_timeout;
 								    uint32_t ib_retry_count;
 								    uint32_t ib_rnr_retry;
 								    uint32_t ib_max_rdma_dst_ops;
 								    uint32_t ib_service_level;
-												Revert r24533 and r24507 until the compile errors can be fixed.

This commit was SVN r24541.

The following SVN revision numbers were found above:
  r24507 --> open-mpi/ompi@4ce1936fed5c6ce5b077d787e7dcee4c732ac82c
  r24533 --> open-mpi/ompi@3204af2d364056dc08060278251c6c2b90f14e07

											
										
										
											2011-03-18 16:33:02 +03:00
+								    uint32_t ib_path_rec_service_level;
-												 * MCA params btl_openib_use_eager_rdma can now override the
   INI file use_eager_rdma value (fixes trac:1169)
 * fixed a typo in a MCA param help message
 * made the check for enabling short/eager RDMA more robust in the
   presence of progress threads; it now emits a show_help warning

This commit was SVN r18723.

The following Trac tickets were found above:
  Ticket 1169 --> https://svn.open-mpi.org/trac/ompi/ticket/1169

											
										
										
											2008-06-24 22:31:46 +04:00
+								    int32_t use_eager_rdma;
-												 - Add a few comments for elements for structs, a few spelling fixes.
   No functional change.

This commit was SVN r14534.

											
										
										
											2007-04-27 01:03:38 +04:00
+								    int32_t eager_rdma_threshold; /**< After this number of msg, use RDMA for short messages, always */
-												post_send() function is called without endpoint lock held from explicit credits
update function so eager_rdma_remote.head have to be updated in a thread safe
manner.

This commit was SVN r15966.

											
										
										
											2007-08-27 15:37:01 +04:00
+								    int32_t eager_rdma_num;
-												Fix some signedness warnings on threaded builds introduced by r12369

This commit was SVN r12376.

The following SVN revision numbers were found above:
  r12369 --> open-mpi/ompi@d7375ec1024ea9337dd454b92f33e3294d7ed6b8

											
										
										
											2006-10-31 20:29:25 +03:00
+								    int32_t max_eager_rdma;
-												Support for LMC (lid mask count) and multiple QPs per port.

This commit was SVN r10536.

											
										
										
											2006-06-28 11:23:08 +04:00
+								    uint32_t btls_per_lid;
 								    uint32_t max_lmc;
-												Adding support for APM over different ports

This commit was SVN r17521.

											
										
										
											2008-02-20 16:44:05 +03:00
+								    int32_t apm_lmc;
 								    int32_t apm_ports;
-												 - Add a few comments for elements for structs, a few spelling fixes.
   No functional change.

This commit was SVN r14534.

											
										
										
											2007-04-27 01:03:38 +04:00
+								    uint32_t buffer_alignment;    /**< Preferred communication buffer alignment in Bytes (must be power of two) */
-												This is a very large change to rename several #define values from
OMPI_* to OPAL_*.  This allows opal layer to be used more independent
from the whole of ompi.

NOTE: 9 "svn mv" operations immediately follow this commit.

This commit was SVN r21180.

											
										
										
											2009-05-07 00:11:28 +04:00
+								#if OPAL_HAVE_THREADS
-												Start setting a flag when a port error is detected on the openib BTL.
At this point, it is just cleared (and ignored) so default behavior has not changed.
However, future failover support can take advantage of this flag.
Reviewed by Pasha Shamis.

This commit was SVN r23204.

											
										
										
											2010-05-24 22:57:55 +04:00
+								    int32_t error_counter;           /**< Counts number on error events that we got on all devices */
-												This commit brings in two major things:

1. Galen's fine-grain control of queue pair resources in the openib
   BTL.
1. Pasha's new implementation of asychronous HCA event handling.

Pasha's new implementation doesn't take much explanation, but the new
"multifrag" stuff does.  

Note that "svn merge" was not used to bring this new code from the
/tmp/ib_multifrag branch -- something Bad happened in the periodic
trunk pulls on that branch making an actual merge back to the trunk
effectively impossible (i.e., lots and lots of arbitrary conflicts and
artifical changes).  :-(

== Fine-grain control of queue pair resources ==

Galen's fine-grain control of queue pair resources to the OpenIB BTL
(thanks to Gleb for fixing broken code and providing additional
functionality, Pasha for finding broken code, and Jeff for doing all
the svn work and regression testing).

Prior to this commit, the OpenIB BTL created two queue pairs: one for
eager size fragments and one for max send size fragments.  When the
use of the shared receive queue (SRQ) was specified (via "-mca
btl_openib_use_srq 1"), these QPs would use a shared receive queue for
receive buffers instead of the default per-peer (PP) receive queues
and buffers.  One consequence of this design is that receive buffer
utilization (the size of the data received as a percentage of the
receive buffer used for the data) was quite poor for a number of
applications.

The new design allows multiple QPs to be specified at runtime.  Each
QP can be setup to use PP or SRQ receive buffers as well as giving
fine-grained control over receive buffer size, number of receive
buffers to post, when to replenish the receive queue (low water mark)
and for SRQ QPs, the number of outstanding sends can also be
specified.  The following is an example of the syntax to describe QPs
to the OpenIB BTL using the new MCA parameter btl_openib_receive_queues:

{{{
-mca btl_openib_receive_queues \
     "P,128,16,4;S,1024,256,128,32;S,4096,256,128,32;S,65536,256,128,32"
}}}

Each QP description is delimited by ";" (semicolon) with individual
fields of the QP description delimited by "," (comma).  The above
example therefore describes 4 QPs.

The first QP is:

    P,128,16,4

Meaning: per-peer receive buffer QPs are indicated by a starting field
of "P"; the first QP (shown above) is therefore a per-peer based QP.
The second field indicates the size of the receive buffer in bytes
(128 bytes).  The third field indicates the number of receive buffers
to allocate to the QP (16).  The fourth field indicates the low
watermark for receive buffers at which time the BTL will repost
receive buffers to the QP (4).

The second QP is:

    S,1024,256,128,32

Shared receive queue based QPs are indicated by a starting field of
"S"; the second QP (shown above) is therefore a shared receive queue
based QP.  The second, third and fourth fields are the same as in the
per-peer based QP.  The fifth field is the number of outstanding sends
that are allowed at a given time on the QP (32).  This provides a
"good enough" mechanism of flow control for some regular communication
patterns.

QPs MUST be specified in ascending receive buffer size order.  This
requirement may be removed prior to 1.3 release.

This commit was SVN r15474.

											
										
										
											2007-07-18 05:15:59 +04:00
+								    int async_pipe[2];               /**< Pipe for comunication with async event thread */
-												Fixing race condition between main thread and async event thread
during openib finalization.

This commit was SVN r18895.

											
										
										
											2008-07-13 20:21:49 +04:00
+								    int async_comp_pipe[2];          /**< Pipe for async thread comunication with main thread */
-												This commit brings in two major things:

1. Galen's fine-grain control of queue pair resources in the openib
   BTL.
1. Pasha's new implementation of asychronous HCA event handling.

Pasha's new implementation doesn't take much explanation, but the new
"multifrag" stuff does.  

Note that "svn merge" was not used to bring this new code from the
/tmp/ib_multifrag branch -- something Bad happened in the periodic
trunk pulls on that branch making an actual merge back to the trunk
effectively impossible (i.e., lots and lots of arbitrary conflicts and
artifical changes).  :-(

== Fine-grain control of queue pair resources ==

Galen's fine-grain control of queue pair resources to the OpenIB BTL
(thanks to Gleb for fixing broken code and providing additional
functionality, Pasha for finding broken code, and Jeff for doing all
the svn work and regression testing).

Prior to this commit, the OpenIB BTL created two queue pairs: one for
eager size fragments and one for max send size fragments.  When the
use of the shared receive queue (SRQ) was specified (via "-mca
btl_openib_use_srq 1"), these QPs would use a shared receive queue for
receive buffers instead of the default per-peer (PP) receive queues
and buffers.  One consequence of this design is that receive buffer
utilization (the size of the data received as a percentage of the
receive buffer used for the data) was quite poor for a number of
applications.

The new design allows multiple QPs to be specified at runtime.  Each
QP can be setup to use PP or SRQ receive buffers as well as giving
fine-grained control over receive buffer size, number of receive
buffers to post, when to replenish the receive queue (low water mark)
and for SRQ QPs, the number of outstanding sends can also be
specified.  The following is an example of the syntax to describe QPs
to the OpenIB BTL using the new MCA parameter btl_openib_receive_queues:

{{{
-mca btl_openib_receive_queues \
     "P,128,16,4;S,1024,256,128,32;S,4096,256,128,32;S,65536,256,128,32"
}}}

Each QP description is delimited by ";" (semicolon) with individual
fields of the QP description delimited by "," (comma).  The above
example therefore describes 4 QPs.

The first QP is:

    P,128,16,4

Meaning: per-peer receive buffer QPs are indicated by a starting field
of "P"; the first QP (shown above) is therefore a per-peer based QP.
The second field indicates the size of the receive buffer in bytes
(128 bytes).  The third field indicates the number of receive buffers
to allocate to the QP (16).  The fourth field indicates the low
watermark for receive buffers at which time the BTL will repost
receive buffers to the QP (4).

The second QP is:

    S,1024,256,128,32

Shared receive queue based QPs are indicated by a starting field of
"S"; the second QP (shown above) is therefore a shared receive queue
based QP.  The second, third and fourth fields are the same as in the
per-peer based QP.  The fifth field is the number of outstanding sends
that are allowed at a given time on the QP (32).  This provides a
"good enough" mechanism of flow control for some regular communication
patterns.

QPs MUST be specified in ascending receive buffer size order.  This
requirement may be removed prior to 1.3 release.

This commit was SVN r15474.

											
										
										
											2007-07-18 05:15:59 +04:00
+								    pthread_t   async_thread;        /**< Async thread that will handle fatal errors */
-												Remove trailing whitespaces. No code changes in this commit.

This commit was SVN r17167.

											
										
										
											2008-01-21 15:11:18 +03:00
+								    uint32_t use_async_event_thread; /**< Use the async event handler */
-												Adding a hash table for management dependences between SRQs and their BTL modules.

This commit was SVN r22653.

											
										
										
											2010-02-18 12:48:16 +03:00
+								    mca_btl_openib_srq_manager_t srq_manager;     /**< Hash table for all BTL SRQs */
-												Final changes from jsquyres review.  Moved configure
code from upper level into btl configure.m4.  Changed
prefix from "OMPI" to "BTL" in preprocessor macro.  Add
an mca param that shows it has been configured in.

This commit was SVN r24270.

											
										
										
											2011-01-19 23:58:22 +03:00
+								#if BTL_OPENIB_FAILOVER_ENABLED
-												Add support for openib BTL failover to be used with bfo PML. 
By default, feature is configured out so no effect on 
normal operation.

This commit was SVN r23412.

											
										
										
											2010-07-14 14:08:19 +04:00
+								    uint32_t port_error_failover;    /**< Report port errors to speed up failover */
 								#endif
-												Adding:
* openib_finalize flow for openib btl
* async event handler for openib btl

This commit was SVN r14623.

											
										
										
											2007-05-09 01:47:21 +04:00
+								#endif
-												Fixes trac:1210, #1319

Commit from a long-standing Mercurial tree that ended up incorporating a lot of things:

 * A few fixes for CPC interface changes in all the CPCs
 * Attempts (but not yet finished) to fix shutdown problems in the IB CM CPC
 * #1319: add CTS support (i.e., initiator guarantees to send first message; automatically activated for iWARP over the RDMA CM CPC)
   * Some variable and function renamings to make this be generic (e.g., alloc_credit_frag became alloc_control_frag)
   * CPCs no longer post receive buffers; they only post a single receive buffer for the CTS if they use CTS. Instead, the main BTL now posts the main sets of receive buffers. 
   * CPCs allocate a CTS buffer only if they're about to make a connection
 * RDMA CM improvements:
   * Use threaded mode openib fd monitoring to wait for for RDMA CM events
   * Synchronize endpoint finalization and disconnection between main thread and service thread to avoid/fix some race conditions
   * Converted several structs to be OBJs so that we can use reference counting to know when to invoke destructors
   * Make some new OBJ's have opal_list_item_t's as their base, thereby eliminating the need for the local list_item_t type
   * Renamed many variables to be internally consistent
   * Centralize the decision in an inline function as to whether this process or the remote process is supposed to be the initiator
   * Add oodles of OPAL_OUTPUT statements for debugging (hard-wired to output stream -1; to be activated by developers if they want/need them) 
   * Use rdma_create_qp() instead of ibv_create_qp()
 * openib fd monitoring improvements:
   * Renamed a bunch of functions and variables to be a little more obvious as to their true function
   * Use pipes to communicate between main thread and service thread
   * Add ability for main thread to invoke a function back on the service thread 
   * Ensure to set initiator_depth and responder_resources properly, but putting max_qp_rd_ataom and ma_qp_init_rd_atom in the modex (see rdma_connect(3))
   * Ensure to set the source IP address in rdma_resolve() to ensure that we select the correct OpenFabrics source port
   * Make new MCA param: openib_btl_connect_rdmacm_resolve_timeout
 * Other improvements:
   * btl_openib_device_type MCA param: can be "iw" or "ib" or "all" (or "infiniband" or "iwarp")
   * Somewhat improved error handling
   * Bunches of spelling fixes in comments, VERBOSE, and OUTPUT statements
   * Oodles of little coding style fixes
   * Changed shutdown ordering of btl; the device is now an OBJ with ref counting for destruction
   * Added some more show_help error messages
   * Change configury to only build IBCM / RDMACM if we have threads (because we need a progress thread) 

This commit was SVN r19686.

The following Trac tickets were found above:
  Ticket 1210 --> https://svn.open-mpi.org/trac/ompi/ticket/1210

											
										
										
											2008-10-06 04:46:02 +04:00
+								    btl_openib_device_type_t device_type;
-												Bring over the functionality from the /tmp/jnysal-openib-wireup
branch:

 * Support btl_openib_if_include and btl_openib_if_exclude MCA
   parameters, similar to those supported by other BTLs.  Each take a
   comma-delimited lists of identifiers.  Identifiers can be HCA
   interface names (e.g., ipath0, mthca1, etc.)  or an HCA interface
   name and port numbers (e.g., ipath0:1, mthca1:2, etc.).  It is an
   error to specify both _include and _exclude.  If you specify a
   non-existant (or non-ACTIVE) HCA and/or port, you'll get a warning
   unless you disable the warning by setting the MCA parameter
   btl_openib_warn_nonexistent_if to 0.
 * Start updating to use BEGIN_C_DECLS and END_C_DECLS
 * A few other minor fixes that were picked up along the way.

This commit was SVN r15063.

											
										
										
											2007-06-14 05:59:25 +04:00
+								    char *if_include;
 								    char **if_include_list;
 								    char *if_exclude;
 								    char **if_exclude_list;
-												This patch consists of two parts.  Part one is the fixing of a bug in the
determing of the IP subnet.  The netmask was being used improperly when
determining which subnet each connection is on.  Part two is the ability to
include/exclude specific subnets.

This patch fixes ticket #1665

This commit was SVN r20016.

											
										
										
											2008-11-17 23:20:24 +03:00
+								    char *ipaddr_include;
 								    char *ipaddr_exclude;
-												Fix up error handling in openib.. Added a simple debug test for memory
registration.. 

This commit was SVN r6520.

											
										
										
											2005-07-15 19:13:19 +04:00
-												Fixes trac:1285.  Really.

This commit has the same commit message as r18450, but without the
extra bonus memory corruption that was introduced.

This commit was SVN r18467.

The following SVN revision numbers were found above:
  r18450 --> open-mpi/ompi@5295902ebec36d49c92edcd3631978d822a107cc

The following Trac tickets were found above:
  Ticket 1285 --> https://svn.open-mpi.org/trac/ompi/ticket/1285

											
										
										
											2008-05-21 01:53:42 +04:00
+								    /* MCA param btl_openib_receive_queues */
 								    char *receive_queues;
 								    /* Whether we got a non-default value of btl_openib_receive_queues */
 								    btl_openib_receive_queues_source_t receive_queues_source;
-												Fixes trac:1295: change language in openib BTL from IB-specific to be
"!OpenFabrics" / neutral (i.e., refer to IB and/or iWARP).

 * Mostly just type, variable/field, and funcion name changes, such as
   s/hca/device/g, etc.  
 * Changed the INI file for the hardware-specific parameters to be
   mca-btl-openib-device-params.ini.
 * Updated a lot of help messages in the help-*.txt files, not just to
   update it to be !OpenFabrics/neutral language, but also for some
   consistency of tone, indenting, etc.
 * Deprecated a bunch of MCA params in favor of language-neutral new
   ones:
   * btl_openib_warn_no_hca_params_found (s/hca/device/)
   * btl_openib_hca_param_files
   * btl_openib_ib_cq_size (s/_ib_/_of_/)
   * btl_openib_ib_max_inline_data
   * btl_openib_ib_psn
   * btl_openib_ib_mtu
   * btl_openib_ib_pkey_ix
   * btl_openib_ib_pkey_val

This commit was SVN r18985.

The following Trac tickets were found above:
  Ticket 1295 --> https://svn.open-mpi.org/trac/ompi/ticket/1295

											
										
										
											2008-07-23 04:28:59 +04:00
+								    /** Colon-delimited list of filenames for device parameters */
 								    char *device_params_file_names;
-												Bring over all the work from the /tmp/ib-hw-detect branch.  In
addition to my design and testing, it was conceptually approved by
Gil, Gleb, Pasha, Brad, and Galen.  Functionally [probably somewhat
lightly] tested by Galen.  We may still have to shake out some bugs
during the next few months, but it seems to be working for all the
cases that I can throw at it.

Here's a summary of the changes from that branch: 

* Move MCA parameter registration to a new file (btl_openib_mca.c):
   * Properly check the retun status of registering MCA params
   * Check for valid values of MCA parameters
   * Make help strings better
   * Otherwise, the only default value of an MCA param that was
     changed was max_btls; it went from 4 to -1 (meaning: use all
     available)
 * Properly prototyped internal functions in _component.c
   * Made a bunch of functions static that didn't need to be public
   * Renamed to remove "mca_" prefix from static functions
   * Call new MCA param registration function
   * Call new INI file read/lookup/finalize functions
   * Updated a bunch of macros to be "BTL_" instead of "ORTE_"
   * Be a little more consistent with return values
   * Handle -1 for the max_btls MCA param
   * Fixed a free() that should have been an OBJ_RELEASE()
   * Some re-indenting
 * Added INI-file parsing
   * New flex file: btl_openib_ini.l
   * New default HCA params .ini file (probably to be expanded over
     time by other HCA vendors)
   * Added more show_help messages for parsing problems
   * Read in INI files and cache the values for later lookup
   * When component opens an HCA, lookup to see if any corresponding
     values were found in the INI files (ID'ed by the HCA vendor_id
     and vendor_part_id)
   * Added btl_openib_verbose MCA param that shows what the INI-file
     stuff does (e.g., shows which MTU your HCA ends up using)
   * Added btl_openib_hca_param_files as a colon-delimited list of INI
     files to check for values during startup (in order,
     left-to-right, just like the MCA base directory param).
   * MTU is currently the only value supported in this framework.
   * It is not a fatal error if we don't find params for the HCA in
     the INI file(s).  Instead, just print a warning.  New MCA param
     btl_openib_warn_no_hca_params_found can be used to disable
     printing the warning.
 * Add MTU to peer negotiation when making a connection
   * Exchange maximum MTU; select the lesser of the two

This commit was SVN r11182.

											
										
										
											2006-08-14 23:30:37 +04:00
 								    /** Whether we're in verbose mode or not */
 								    bool verbose;
-												Fixes trac:1295: change language in openib BTL from IB-specific to be
"!OpenFabrics" / neutral (i.e., refer to IB and/or iWARP).

 * Mostly just type, variable/field, and funcion name changes, such as
   s/hca/device/g, etc.  
 * Changed the INI file for the hardware-specific parameters to be
   mca-btl-openib-device-params.ini.
 * Updated a lot of help messages in the help-*.txt files, not just to
   update it to be !OpenFabrics/neutral language, but also for some
   consistency of tone, indenting, etc.
 * Deprecated a bunch of MCA params in favor of language-neutral new
   ones:
   * btl_openib_warn_no_hca_params_found (s/hca/device/)
   * btl_openib_hca_param_files
   * btl_openib_ib_cq_size (s/_ib_/_of_/)
   * btl_openib_ib_max_inline_data
   * btl_openib_ib_psn
   * btl_openib_ib_mtu
   * btl_openib_ib_pkey_ix
   * btl_openib_ib_pkey_val

This commit was SVN r18985.

The following Trac tickets were found above:
  Ticket 1295 --> https://svn.open-mpi.org/trac/ompi/ticket/1295

											
										
										
											2008-07-23 04:28:59 +04:00
+								    /** Whether we want a warning if no device-specific parameters are
-												Bring over all the work from the /tmp/ib-hw-detect branch.  In
addition to my design and testing, it was conceptually approved by
Gil, Gleb, Pasha, Brad, and Galen.  Functionally [probably somewhat
lightly] tested by Galen.  We may still have to shake out some bugs
during the next few months, but it seems to be working for all the
cases that I can throw at it.

Here's a summary of the changes from that branch: 

* Move MCA parameter registration to a new file (btl_openib_mca.c):
   * Properly check the retun status of registering MCA params
   * Check for valid values of MCA parameters
   * Make help strings better
   * Otherwise, the only default value of an MCA param that was
     changed was max_btls; it went from 4 to -1 (meaning: use all
     available)
 * Properly prototyped internal functions in _component.c
   * Made a bunch of functions static that didn't need to be public
   * Renamed to remove "mca_" prefix from static functions
   * Call new MCA param registration function
   * Call new INI file read/lookup/finalize functions
   * Updated a bunch of macros to be "BTL_" instead of "ORTE_"
   * Be a little more consistent with return values
   * Handle -1 for the max_btls MCA param
   * Fixed a free() that should have been an OBJ_RELEASE()
   * Some re-indenting
 * Added INI-file parsing
   * New flex file: btl_openib_ini.l
   * New default HCA params .ini file (probably to be expanded over
     time by other HCA vendors)
   * Added more show_help messages for parsing problems
   * Read in INI files and cache the values for later lookup
   * When component opens an HCA, lookup to see if any corresponding
     values were found in the INI files (ID'ed by the HCA vendor_id
     and vendor_part_id)
   * Added btl_openib_verbose MCA param that shows what the INI-file
     stuff does (e.g., shows which MTU your HCA ends up using)
   * Added btl_openib_hca_param_files as a colon-delimited list of INI
     files to check for values during startup (in order,
     left-to-right, just like the MCA base directory param).
   * MTU is currently the only value supported in this framework.
   * It is not a fatal error if we don't find params for the HCA in
     the INI file(s).  Instead, just print a warning.  New MCA param
     btl_openib_warn_no_hca_params_found can be used to disable
     printing the warning.
 * Add MTU to peer negotiation when making a connection
   * Exchange maximum MTU; select the lesser of the two

This commit was SVN r11182.

											
										
										
											2006-08-14 23:30:37 +04:00
+								        found in INI files */
-												Fixes trac:1295: change language in openib BTL from IB-specific to be
"!OpenFabrics" / neutral (i.e., refer to IB and/or iWARP).

 * Mostly just type, variable/field, and funcion name changes, such as
   s/hca/device/g, etc.  
 * Changed the INI file for the hardware-specific parameters to be
   mca-btl-openib-device-params.ini.
 * Updated a lot of help messages in the help-*.txt files, not just to
   update it to be !OpenFabrics/neutral language, but also for some
   consistency of tone, indenting, etc.
 * Deprecated a bunch of MCA params in favor of language-neutral new
   ones:
   * btl_openib_warn_no_hca_params_found (s/hca/device/)
   * btl_openib_hca_param_files
   * btl_openib_ib_cq_size (s/_ib_/_of_/)
   * btl_openib_ib_max_inline_data
   * btl_openib_ib_psn
   * btl_openib_ib_mtu
   * btl_openib_ib_pkey_ix
   * btl_openib_ib_pkey_val

This commit was SVN r18985.

The following Trac tickets were found above:
  Ticket 1295 --> https://svn.open-mpi.org/trac/ompi/ticket/1295

											
										
										
											2008-07-23 04:28:59 +04:00
+								    bool warn_no_device_params_found;
-												Local GID table contains not what I thought it contains. It contains local HCA
GIDs (there can be more than one) and not GIDs of the HCA on the network. Entry
zero always have to be initialized so we use it, and warn user if there is more
then one port active and default subnet is configured on at least one of them.

This commit was SVN r11815.

											
										
										
											2006-09-26 16:12:33 +04:00
+								    /** Whether we want a warning if non default GID prefix is not configured
 								        on multiport setup */
 								    bool warn_default_gid_prefix;
-												Bring over the functionality from the /tmp/jnysal-openib-wireup
branch:

 * Support btl_openib_if_include and btl_openib_if_exclude MCA
   parameters, similar to those supported by other BTLs.  Each take a
   comma-delimited lists of identifiers.  Identifiers can be HCA
   interface names (e.g., ipath0, mthca1, etc.)  or an HCA interface
   name and port numbers (e.g., ipath0:1, mthca1:2, etc.).  It is an
   error to specify both _include and _exclude.  If you specify a
   non-existant (or non-ACTIVE) HCA and/or port, you'll get a warning
   unless you disable the warning by setting the MCA parameter
   btl_openib_warn_nonexistent_if to 0.
 * Start updating to use BEGIN_C_DECLS and END_C_DECLS
 * A few other minor fixes that were picked up along the way.

This commit was SVN r15063.

											
										
										
											2007-06-14 05:59:25 +04:00
+								    /** Whether we want a warning if the user specifies a non-existent
-												Fixes trac:1295: change language in openib BTL from IB-specific to be
"!OpenFabrics" / neutral (i.e., refer to IB and/or iWARP).

 * Mostly just type, variable/field, and funcion name changes, such as
   s/hca/device/g, etc.  
 * Changed the INI file for the hardware-specific parameters to be
   mca-btl-openib-device-params.ini.
 * Updated a lot of help messages in the help-*.txt files, not just to
   update it to be !OpenFabrics/neutral language, but also for some
   consistency of tone, indenting, etc.
 * Deprecated a bunch of MCA params in favor of language-neutral new
   ones:
   * btl_openib_warn_no_hca_params_found (s/hca/device/)
   * btl_openib_hca_param_files
   * btl_openib_ib_cq_size (s/_ib_/_of_/)
   * btl_openib_ib_max_inline_data
   * btl_openib_ib_psn
   * btl_openib_ib_mtu
   * btl_openib_ib_pkey_ix
   * btl_openib_ib_pkey_val

This commit was SVN r18985.

The following Trac tickets were found above:
  Ticket 1295 --> https://svn.open-mpi.org/trac/ompi/ticket/1295

											
										
										
											2008-07-23 04:28:59 +04:00
+								        device and/or port via btl_openib_if_[in|ex]clude MCA params */
-												Bring over the functionality from the /tmp/jnysal-openib-wireup
branch:

 * Support btl_openib_if_include and btl_openib_if_exclude MCA
   parameters, similar to those supported by other BTLs.  Each take a
   comma-delimited lists of identifiers.  Identifiers can be HCA
   interface names (e.g., ipath0, mthca1, etc.)  or an HCA interface
   name and port numbers (e.g., ipath0:1, mthca1:2, etc.).  It is an
   error to specify both _include and _exclude.  If you specify a
   non-existant (or non-ACTIVE) HCA and/or port, you'll get a warning
   unless you disable the warning by setting the MCA parameter
   btl_openib_warn_nonexistent_if to 0.
 * Start updating to use BEGIN_C_DECLS and END_C_DECLS
 * A few other minor fixes that were picked up along the way.

This commit was SVN r15063.

											
										
										
											2007-06-14 05:59:25 +04:00
+								    bool warn_nonexistent_if;
 								    /** Dummy argv-style list; a copy of names from the
 								        if_[in|ex]clude list that we use for error checking (to ensure
 								        that they all exist) */
 								    char **if_list;
-												Message coalescing for openib BTL. If fragment is waiting to be transmitted in
a pending queue pack another message into it if there is enough space there.

This commit was SVN r16900.

											
										
										
											2007-12-09 17:05:13 +03:00
+								    bool use_message_coalescing;
-												OpenIB BTL has three channels through which data can be received (eager rdma,
high prio QPs and low prio QPs) and because not all of them are polled each time
progrgess() is called (to save on latency) starvation is possible. The commit
fixes this. Now each channel is polled, but higher priority channels are polled
more often. Three new parameters are introduced that control polling ratios 
between different channels.

This commit was SVN r17024.

											
										
										
											2007-12-23 15:29:34 +03:00
+								    uint32_t cq_poll_ratio;
 								    uint32_t cq_poll_progress;
 								    uint32_t eager_rdma_poll_ratio;
-												Merge the /tmp/jms-installdirs-trunk branch into the trunk.  This
finally brings in functionality that is already on the 1.2 branch, and
was developed and tested in the v1.2ofed branch (and other places).

Short version of new features:

 * Support for ibv_fork_init() 
 * Automatically fill in the openib BTL bandwidth value by 
   querying the HCA port 
 * Installdirs functionality 
 * Fixes to always use -I in the Fortran wrapper compilers (#924) 
 * Gleb's mpool updates 
 * Remove some kruft in btl/openib/configure.m4, therefore 
   fixing the harmless warnings noted in #665 
 * Bunches of updates to the Linux RPM spec file 

I.e., effectively the same thing that r14411 brought to the v1.2
branch.

Also effectively brought in r14432 and r14433 (some fixes on top of
the original r14411 commit to v1.2).  Still need to bring in the moral
equivalent of r14445 after this commit (fixes to installdirs).

This commit was SVN r14449.

The following SVN revision numbers were found above:
  r14411 --> open-mpi/ompi@83b31314ae25efa4a03dfc560b569322a18e3d23
  r14432 --> open-mpi/ompi@a48f160595abf35f5826e9370135a9056d4da486
  r14433 --> open-mpi/ompi@68f346d2bc482ae29df0cdfe82171dcdb28e4a01
  r14445 --> open-mpi/ompi@13d366b827719ec7078db17315513d5c6291ab96

											
										
										
											2007-04-21 04:15:05 +04:00
+								#ifdef HAVE_IBV_FORK_INIT
 								    /** Whether we want fork support or not */
 								    int want_fork_support;
 								#endif
-												This commit brings in two major things:

1. Galen's fine-grain control of queue pair resources in the openib
   BTL.
1. Pasha's new implementation of asychronous HCA event handling.

Pasha's new implementation doesn't take much explanation, but the new
"multifrag" stuff does.  

Note that "svn merge" was not used to bring this new code from the
/tmp/ib_multifrag branch -- something Bad happened in the periodic
trunk pulls on that branch making an actual merge back to the trunk
effectively impossible (i.e., lots and lots of arbitrary conflicts and
artifical changes).  :-(

== Fine-grain control of queue pair resources ==

Galen's fine-grain control of queue pair resources to the OpenIB BTL
(thanks to Gleb for fixing broken code and providing additional
functionality, Pasha for finding broken code, and Jeff for doing all
the svn work and regression testing).

Prior to this commit, the OpenIB BTL created two queue pairs: one for
eager size fragments and one for max send size fragments.  When the
use of the shared receive queue (SRQ) was specified (via "-mca
btl_openib_use_srq 1"), these QPs would use a shared receive queue for
receive buffers instead of the default per-peer (PP) receive queues
and buffers.  One consequence of this design is that receive buffer
utilization (the size of the data received as a percentage of the
receive buffer used for the data) was quite poor for a number of
applications.

The new design allows multiple QPs to be specified at runtime.  Each
QP can be setup to use PP or SRQ receive buffers as well as giving
fine-grained control over receive buffer size, number of receive
buffers to post, when to replenish the receive queue (low water mark)
and for SRQ QPs, the number of outstanding sends can also be
specified.  The following is an example of the syntax to describe QPs
to the OpenIB BTL using the new MCA parameter btl_openib_receive_queues:

{{{
-mca btl_openib_receive_queues \
     "P,128,16,4;S,1024,256,128,32;S,4096,256,128,32;S,65536,256,128,32"
}}}

Each QP description is delimited by ";" (semicolon) with individual
fields of the QP description delimited by "," (comma).  The above
example therefore describes 4 QPs.

The first QP is:

    P,128,16,4

Meaning: per-peer receive buffer QPs are indicated by a starting field
of "P"; the first QP (shown above) is therefore a per-peer based QP.
The second field indicates the size of the receive buffer in bytes
(128 bytes).  The third field indicates the number of receive buffers
to allocate to the QP (16).  The fourth field indicates the low
watermark for receive buffers at which time the BTL will repost
receive buffers to the QP (4).

The second QP is:

    S,1024,256,128,32

Shared receive queue based QPs are indicated by a starting field of
"S"; the second QP (shown above) is therefore a shared receive queue
based QP.  The second, third and fourth fields are the same as in the
per-peer based QP.  The fifth field is the number of outstanding sends
that are allowed at a given time on the QP (32).  This provides a
"good enough" mechanism of flow control for some regular communication
patterns.

QPs MUST be specified in ascending receive buffer size order.  This
requirement may be removed prior to 1.3 release.

This commit was SVN r15474.

											
										
										
											2007-07-18 05:15:59 +04:00
+								    int rdma_qp;
-												Send all explicit credits for PP QPs of all orders over smallest PP qp.

This commit was SVN r16781.

											
										
										
											2007-11-28 10:13:34 +03:00
+								    int credits_qp; /* qp used for software flow control */
-												Bug fix for #1376.
If IBCM was explicitly specified with exclude/include parameter,
OpenIB BTL will enable verbose report for "/dev/infiniband/ucm" error,
other way the error will not be reported.

This commit was SVN r18868.

											
										
										
											2008-07-10 19:08:49 +04:00
+								    bool cpc_explicitly_defined;
-												Create free lists of fragments per HCA, not per BTL. Saves memory in case of
multiple LMCs.

This commit was SVN r17082.

											
										
										
											2008-01-09 13:26:21 +03:00
+								    /**< free list of frags only; used for pining user memory */
 								    ompi_free_list_t send_user_free;
 								    /**< free list of frags only; used for pining user memory */
 								    ompi_free_list_t recv_user_free;
 								    /**< frags for coalesced massages */
 								    ompi_free_list_t send_free_coalesced;
-												Improving support for non homogeneous OpenFabrics network configurations

This commit was SVN r22312.

											
										
										
											2009-12-15 17:25:07 +03:00
+								    /** Default receive queues */
 								    char* default_recv_qps;
-												Add btl_openib_gid_index MCA param to allow selecting which GID to use
from an openfabrics port's GID table.

This commit was SVN r24456.

											
										
										
											2011-02-24 17:09:22 +03:00
+								    /** GID index to use */
 								    int gid_index;
-												Adding support for on-demand SRQ pre-post (receive wqe allocation)

This commit was SVN r22313.

											
										
										
											2009-12-15 18:52:10 +03:00
+								    /** Whether we want a dynamically resizing srq, enabled by default */
 								    bool enable_srq_resize;
-												Final changes from jsquyres review.  Moved configure
code from upper level into btl configure.m4.  Changed
prefix from "OMPI" to "BTL" in preprocessor macro.  Add
an mca param that shows it has been configured in.

This commit was SVN r24270.

											
										
										
											2011-01-19 23:58:22 +03:00
+								#if BTL_OPENIB_FAILOVER_ENABLED
-												Add support for openib BTL failover to be used with bfo PML. 
By default, feature is configured out so no effect on 
normal operation.

This commit was SVN r23412.

											
										
										
											2010-07-14 14:08:19 +04:00
+								    int verbose_failover;
 								#endif
-												removed the ib ptl and fixed a few conflicts in my previous commit

This commit was SVN r6244.

											
										
										
											2005-07-01 01:28:35 +04:00
+								}; typedef struct mca_btl_openib_component_t mca_btl_openib_component_t;
-												The visibility flags (and/or Windows friendly export) is now on for all BTLs.

This commit was SVN r11662.

											
										
										
											2006-09-15 02:19:39 +04:00
+								OMPI_MODULE_DECLSPEC extern mca_btl_openib_component_t mca_btl_openib_component;
-												removed the ib ptl and fixed a few conflicts in my previous commit

This commit was SVN r6244.

											
										
										
											2005-07-01 01:28:35 +04:00
-												Remove trailing whitespaces. No code changes in this commit.

This commit was SVN r17167.

											
										
										
											2008-01-21 15:11:18 +03:00
+								typedef mca_btl_base_recv_reg_t mca_btl_openib_recv_reg_t;
-												Merge in /tmp-public/cpc3 branch to trunk.  oob/xoob still remains the
default CPC.

This commit was SVN r18356.

											
										
										
											2008-05-02 15:52:33 +04:00
+								/**
 								 * Common information for all ports that is sent in the modex message
 								 */
 								typedef struct mca_btl_openib_modex_message_t {
 								    /** The subnet ID of this port */
-												call it what it is...
we are looking at subnet_id's and we are counting active ports per subnet. 
move subnet count out of procs loop,, no need to do it there... 

This commit was SVN r13105.

											
										
										
											2007-01-13 01:42:20 +03:00
+								    uint64_t subnet_id;
-												Merge in /tmp-public/cpc3 branch to trunk.  oob/xoob still remains the
default CPC.

This commit was SVN r18356.

											
										
										
											2008-05-02 15:52:33 +04:00
+								    /** LID of this port */
 								    uint16_t lid;
 								    /** APM LID for this port */
 								    uint16_t apm_lid;
 								    /** The MTU used by this port */
 								    uint8_t mtu;
-												Improving support for non homogeneous OpenFabrics network configurations

This commit was SVN r22312.

											
										
										
											2009-12-15 17:25:07 +03:00
+								    /** vendor id define device type and tuning */
 								    uint32_t vendor_id;
 								    /** vendor part id define device type and tuning */
 								    uint32_t vendor_part_id;
 								    /** Transport type of remote port */
 								    uint8_t transport_type;
-												Merge in /tmp-public/cpc3 branch to trunk.  oob/xoob still remains the
default CPC.

This commit was SVN r18356.

											
										
										
											2008-05-02 15:52:33 +04:00
+								    /** Dummy field used to calculate the real length */
 								    uint8_t end;
 								} mca_btl_openib_modex_message_t;
 								#define MCA_BTL_OPENIB_MODEX_MSG_NTOH(hdr)     \
-												heterogeneous fixes to the OpenIB BTL. This includes work by nysal, brian and
I. 

This commit was SVN r13106.

											
										
										
											2007-01-13 02:14:45 +03:00
+								    do {                              \
-												Make the trunk openib btl compile again.

This commit was SVN r13110.

											
										
										
											2007-01-13 17:22:42 +03:00
+								        (hdr).subnet_id = ntoh64((hdr).subnet_id); \
-												Merge in /tmp-public/cpc3 branch to trunk.  oob/xoob still remains the
default CPC.

This commit was SVN r18356.

											
										
										
											2008-05-02 15:52:33 +04:00
+								        (hdr).lid = ntohs((hdr).lid); \
-												heterogeneous fixes to the OpenIB BTL. This includes work by nysal, brian and
I. 

This commit was SVN r13106.

											
										
										
											2007-01-13 02:14:45 +03:00
+								    } while (0)
-												Merge in /tmp-public/cpc3 branch to trunk.  oob/xoob still remains the
default CPC.

This commit was SVN r18356.

											
										
										
											2008-05-02 15:52:33 +04:00
+								#define MCA_BTL_OPENIB_MODEX_MSG_HTON(hdr)     \
-												heterogeneous fixes to the OpenIB BTL. This includes work by nysal, brian and
I. 

This commit was SVN r13106.

											
										
										
											2007-01-13 02:14:45 +03:00
+								    do {                              \
-												Make the trunk openib btl compile again.

This commit was SVN r13110.

											
										
										
											2007-01-13 17:22:42 +03:00
+								        (hdr).subnet_id = hton64((hdr).subnet_id); \
-												Merge in /tmp-public/cpc3 branch to trunk.  oob/xoob still remains the
default CPC.

This commit was SVN r18356.

											
										
										
											2008-05-02 15:52:33 +04:00
+								        (hdr).lid = htons((hdr).lid); \
-												heterogeneous fixes to the OpenIB BTL. This includes work by nysal, brian and
I. 

This commit was SVN r13106.

											
										
										
											2007-01-13 02:14:45 +03:00
+								    } while (0)
-												Fixes trac:1295: change language in openib BTL from IB-specific to be
"!OpenFabrics" / neutral (i.e., refer to IB and/or iWARP).

 * Mostly just type, variable/field, and funcion name changes, such as
   s/hca/device/g, etc.  
 * Changed the INI file for the hardware-specific parameters to be
   mca-btl-openib-device-params.ini.
 * Updated a lot of help messages in the help-*.txt files, not just to
   update it to be !OpenFabrics/neutral language, but also for some
   consistency of tone, indenting, etc.
 * Deprecated a bunch of MCA params in favor of language-neutral new
   ones:
   * btl_openib_warn_no_hca_params_found (s/hca/device/)
   * btl_openib_hca_param_files
   * btl_openib_ib_cq_size (s/_ib_/_of_/)
   * btl_openib_ib_max_inline_data
   * btl_openib_ib_psn
   * btl_openib_ib_mtu
   * btl_openib_ib_pkey_ix
   * btl_openib_ib_pkey_val

This commit was SVN r18985.

The following Trac tickets were found above:
  Ticket 1295 --> https://svn.open-mpi.org/trac/ompi/ticket/1295

											
										
										
											2008-07-23 04:28:59 +04:00
+								typedef struct mca_btl_openib_device_qp_t {
-												Create free lists of fragments per HCA, not per BTL. Saves memory in case of
multiple LMCs.

This commit was SVN r17082.

											
										
										
											2008-01-09 13:26:21 +03:00
+								    ompi_free_list_t send_free;     /**< free lists of send buffer descriptors */
 								    ompi_free_list_t recv_free;     /**< free lists of receive buffer descriptors */
-												Fixes trac:1295: change language in openib BTL from IB-specific to be
"!OpenFabrics" / neutral (i.e., refer to IB and/or iWARP).

 * Mostly just type, variable/field, and funcion name changes, such as
   s/hca/device/g, etc.  
 * Changed the INI file for the hardware-specific parameters to be
   mca-btl-openib-device-params.ini.
 * Updated a lot of help messages in the help-*.txt files, not just to
   update it to be !OpenFabrics/neutral language, but also for some
   consistency of tone, indenting, etc.
 * Deprecated a bunch of MCA params in favor of language-neutral new
   ones:
   * btl_openib_warn_no_hca_params_found (s/hca/device/)
   * btl_openib_hca_param_files
   * btl_openib_ib_cq_size (s/_ib_/_of_/)
   * btl_openib_ib_max_inline_data
   * btl_openib_ib_psn
   * btl_openib_ib_mtu
   * btl_openib_ib_pkey_ix
   * btl_openib_ib_pkey_val

This commit was SVN r18985.

The following Trac tickets were found above:
  Ticket 1295 --> https://svn.open-mpi.org/trac/ompi/ticket/1295

											
										
										
											2008-07-23 04:28:59 +04:00
+								} mca_btl_openib_device_qp_t;
-												Create free lists of fragments per HCA, not per BTL. Saves memory in case of
multiple LMCs.

This commit was SVN r17082.

											
										
										
											2008-01-09 13:26:21 +03:00
-												OpenIB BTL has three channels through which data can be received (eager rdma,
high prio QPs and low prio QPs) and because not all of them are polled each time
progrgess() is called (to save on latency) starvation is possible. The commit
fixes this. Now each channel is polled, but higher priority channels are polled
more often. Three new parameters are introduced that control polling ratios 
between different channels.

This commit was SVN r17024.

											
										
										
											2007-12-23 15:29:34 +03:00
+								struct mca_btl_base_endpoint_t;
-												Fixes trac:1295: change language in openib BTL from IB-specific to be
"!OpenFabrics" / neutral (i.e., refer to IB and/or iWARP).

 * Mostly just type, variable/field, and funcion name changes, such as
   s/hca/device/g, etc.  
 * Changed the INI file for the hardware-specific parameters to be
   mca-btl-openib-device-params.ini.
 * Updated a lot of help messages in the help-*.txt files, not just to
   update it to be !OpenFabrics/neutral language, but also for some
   consistency of tone, indenting, etc.
 * Deprecated a bunch of MCA params in favor of language-neutral new
   ones:
   * btl_openib_warn_no_hca_params_found (s/hca/device/)
   * btl_openib_hca_param_files
   * btl_openib_ib_cq_size (s/_ib_/_of_/)
   * btl_openib_ib_max_inline_data
   * btl_openib_ib_psn
   * btl_openib_ib_mtu
   * btl_openib_ib_pkey_ix
   * btl_openib_ib_pkey_val

This commit was SVN r18985.

The following Trac tickets were found above:
  Ticket 1295 --> https://svn.open-mpi.org/trac/ompi/ticket/1295

											
										
										
											2008-07-23 04:28:59 +04:00
+								typedef struct mca_btl_openib_device_t {
-												OpenIB BTL has three channels through which data can be received (eager rdma,
high prio QPs and low prio QPs) and because not all of them are polled each time
progrgess() is called (to save on latency) starvation is possible. The commit
fixes this. Now each channel is polled, but higher priority channels are polled
more often. Three new parameters are introduced that control polling ratios 
between different channels.

This commit was SVN r17024.

											
										
										
											2007-12-23 15:29:34 +03:00
+								    opal_object_t super;
-												Support for LMC (lid mask count) and multiple QPs per port.

This commit was SVN r10536.

											
										
										
											2006-06-28 11:23:08 +04:00
+								    struct ibv_device *ib_dev;  /* the ib device */
-Update libevent to the 2.0 series, currently at 2.0.7rc. We will update to their final release when it becomes available. Currently known errors exist in unused portions of the libevent code. This revision passes the IBM test suite on a Linux machine and on a standalone Mac.

This is a fairly intrusive change, but outside of the moving of opal/event to opal/mca/event, the only changes involved (a) changing all calls to opal_event functions to reflect the new framework instead, and (b) ensuring that all opal_event_t objects are properly constructed since they are now true opal_objects.

Note: Shiqing has just returned from vacation and has not yet had a chance to complete the Windows integration. Thus, this commit almost certainly breaks Windows support on the trunk. However, I want this to have a chance to soak for as long as possible before I become less available a week from today (going to be at a class for 5 days, and thus will only be sparingly available) so we can find and fix any problems.

Biggest change is moving the libevent code from opal/event to a new opal/mca/event framework. This was done to make it much easier to update libevent in the future. New versions can be inserted as a new component and tested in parallel with the current version until validated, then we can remove the earlier version if we so choose. This is a statically built framework ala installdirs, so only one component will build at a time. There is no selection logic - the sole compiled component simply loads its function pointers into the opal_event struct.

I have gone thru the code base and converted all the libevent calls I could find. However, I cannot compile nor test every environment. It is therefore quite likely that errors remain in the system. Please keep an eye open for two things:

1. compile-time errors: these will be obvious as calls to the old functions (e.g., opal_evtimer_new) must be replaced by the new framework APIs (e.g., opal_event.evtimer_new)

2. run-time errors: these will likely show up as segfaults due to missing constructors on opal_event_t objects. It appears that it became a typical practice for people to "init" an opal_event_t by simply using memset to zero it out. This will no longer work - you must either OBJ_NEW or OBJ_CONSTRUCT an opal_event_t. I tried to catch these cases, but may have missed some. Believe me, you'll know when you hit it.

There is also the issue of the new libevent "no recursion" behavior. As I described on a recent email, we will have to discuss this and figure out what, if anything, we need to do.

This commit was SVN r23925.

											
										
										
											2010-10-24 22:35:54 +04:00
+								#if OMPI_ENABLE_PROGRESS_THREADS == 1
-												Fixes trac:1295: change language in openib BTL from IB-specific to be
"!OpenFabrics" / neutral (i.e., refer to IB and/or iWARP).

 * Mostly just type, variable/field, and funcion name changes, such as
   s/hca/device/g, etc.  
 * Changed the INI file for the hardware-specific parameters to be
   mca-btl-openib-device-params.ini.
 * Updated a lot of help messages in the help-*.txt files, not just to
   update it to be !OpenFabrics/neutral language, but also for some
   consistency of tone, indenting, etc.
 * Deprecated a bunch of MCA params in favor of language-neutral new
   ones:
   * btl_openib_warn_no_hca_params_found (s/hca/device/)
   * btl_openib_hca_param_files
   * btl_openib_ib_cq_size (s/_ib_/_of_/)
   * btl_openib_ib_max_inline_data
   * btl_openib_ib_psn
   * btl_openib_ib_mtu
   * btl_openib_ib_pkey_ix
   * btl_openib_ib_pkey_val

This commit was SVN r18985.

The following Trac tickets were found above:
  Ticket 1295 --> https://svn.open-mpi.org/trac/ompi/ticket/1295

											
										
										
											2008-07-23 04:28:59 +04:00
+								    struct ibv_comp_channel *ib_channel; /* Channel event for the device */
-												Adding progress thread support to OpenIB BTL.

Reviewed by Gleb.

This commit was SVN r12411.

											
										
										
											2006-11-02 19:15:21 +03:00
+								    opal_thread_t thread;                /* Progress thread */
 								    volatile bool progress;              /* Progress status */
 								#endif
-												Fixes trac:1295: change language in openib BTL from IB-specific to be
"!OpenFabrics" / neutral (i.e., refer to IB and/or iWARP).

 * Mostly just type, variable/field, and funcion name changes, such as
   s/hca/device/g, etc.  
 * Changed the INI file for the hardware-specific parameters to be
   mca-btl-openib-device-params.ini.
 * Updated a lot of help messages in the help-*.txt files, not just to
   update it to be !OpenFabrics/neutral language, but also for some
   consistency of tone, indenting, etc.
 * Deprecated a bunch of MCA params in favor of language-neutral new
   ones:
   * btl_openib_warn_no_hca_params_found (s/hca/device/)
   * btl_openib_hca_param_files
   * btl_openib_ib_cq_size (s/_ib_/_of_/)
   * btl_openib_ib_max_inline_data
   * btl_openib_ib_psn
   * btl_openib_ib_mtu
   * btl_openib_ib_pkey_ix
   * btl_openib_ib_pkey_val

This commit was SVN r18985.

The following Trac tickets were found above:
  Ticket 1295 --> https://svn.open-mpi.org/trac/ompi/ticket/1295

											
										
										
											2008-07-23 04:28:59 +04:00
+								    opal_mutex_t device_lock;          /* device level lock */
-												Support for LMC (lid mask count) and multiple QPs per port.

This commit was SVN r10536.

											
										
										
											2006-06-28 11:23:08 +04:00
+								    struct ibv_context *ib_dev_context;
 								    struct ibv_device_attr ib_dev_attr;
 								    struct ibv_pd *ib_pd;
-												Create only one CQ for all BTLs on the same HCA. Many BTLs can be created for
one HCA. Multiple ports, LMC, multiple BTLs per one LID. Having only one CQ for
all of them substantially reduce polling time.

This commit was SVN r15933.

											
										
										
											2007-08-20 16:28:25 +04:00
+								    struct ibv_cq *ib_cq[2];
 								    uint32_t cq_size[2];
-												Support for LMC (lid mask count) and multiple QPs per port.

This commit was SVN r10536.

											
										
										
											2006-06-28 11:23:08 +04:00
+								    mca_mpool_base_module_t *mpool;
-												Fixes trac:1295: change language in openib BTL from IB-specific to be
"!OpenFabrics" / neutral (i.e., refer to IB and/or iWARP).

 * Mostly just type, variable/field, and funcion name changes, such as
   s/hca/device/g, etc.  
 * Changed the INI file for the hardware-specific parameters to be
   mca-btl-openib-device-params.ini.
 * Updated a lot of help messages in the help-*.txt files, not just to
   update it to be !OpenFabrics/neutral language, but also for some
   consistency of tone, indenting, etc.
 * Deprecated a bunch of MCA params in favor of language-neutral new
   ones:
   * btl_openib_warn_no_hca_params_found (s/hca/device/)
   * btl_openib_hca_param_files
   * btl_openib_ib_cq_size (s/_ib_/_of_/)
   * btl_openib_ib_max_inline_data
   * btl_openib_ib_psn
   * btl_openib_ib_mtu
   * btl_openib_ib_pkey_ix
   * btl_openib_ib_pkey_val

This commit was SVN r18985.

The following Trac tickets were found above:
  Ticket 1295 --> https://svn.open-mpi.org/trac/ompi/ticket/1295

											
										
										
											2008-07-23 04:28:59 +04:00
+								    /* MTU for this device */
-												Bring over all the work from the /tmp/ib-hw-detect branch.  In
addition to my design and testing, it was conceptually approved by
Gil, Gleb, Pasha, Brad, and Galen.  Functionally [probably somewhat
lightly] tested by Galen.  We may still have to shake out some bugs
during the next few months, but it seems to be working for all the
cases that I can throw at it.

Here's a summary of the changes from that branch: 

* Move MCA parameter registration to a new file (btl_openib_mca.c):
   * Properly check the retun status of registering MCA params
   * Check for valid values of MCA parameters
   * Make help strings better
   * Otherwise, the only default value of an MCA param that was
     changed was max_btls; it went from 4 to -1 (meaning: use all
     available)
 * Properly prototyped internal functions in _component.c
   * Made a bunch of functions static that didn't need to be public
   * Renamed to remove "mca_" prefix from static functions
   * Call new MCA param registration function
   * Call new INI file read/lookup/finalize functions
   * Updated a bunch of macros to be "BTL_" instead of "ORTE_"
   * Be a little more consistent with return values
   * Handle -1 for the max_btls MCA param
   * Fixed a free() that should have been an OBJ_RELEASE()
   * Some re-indenting
 * Added INI-file parsing
   * New flex file: btl_openib_ini.l
   * New default HCA params .ini file (probably to be expanded over
     time by other HCA vendors)
   * Added more show_help messages for parsing problems
   * Read in INI files and cache the values for later lookup
   * When component opens an HCA, lookup to see if any corresponding
     values were found in the INI files (ID'ed by the HCA vendor_id
     and vendor_part_id)
   * Added btl_openib_verbose MCA param that shows what the INI-file
     stuff does (e.g., shows which MTU your HCA ends up using)
   * Added btl_openib_hca_param_files as a colon-delimited list of INI
     files to check for values during startup (in order,
     left-to-right, just like the MCA base directory param).
   * MTU is currently the only value supported in this framework.
   * It is not a fatal error if we don't find params for the HCA in
     the INI file(s).  Instead, just print a warning.  New MCA param
     btl_openib_warn_no_hca_params_found can be used to disable
     printing the warning.
 * Add MTU to peer negotiation when making a connection
   * Exchange maximum MTU; select the lesser of the two

This commit was SVN r11182.

											
										
										
											2006-08-14 23:30:37 +04:00
+								    uint32_t mtu;
-												Fixes trac:1295: change language in openib BTL from IB-specific to be
"!OpenFabrics" / neutral (i.e., refer to IB and/or iWARP).

 * Mostly just type, variable/field, and funcion name changes, such as
   s/hca/device/g, etc.  
 * Changed the INI file for the hardware-specific parameters to be
   mca-btl-openib-device-params.ini.
 * Updated a lot of help messages in the help-*.txt files, not just to
   update it to be !OpenFabrics/neutral language, but also for some
   consistency of tone, indenting, etc.
 * Deprecated a bunch of MCA params in favor of language-neutral new
   ones:
   * btl_openib_warn_no_hca_params_found (s/hca/device/)
   * btl_openib_hca_param_files
   * btl_openib_ib_cq_size (s/_ib_/_of_/)
   * btl_openib_ib_max_inline_data
   * btl_openib_ib_psn
   * btl_openib_ib_mtu
   * btl_openib_ib_pkey_ix
   * btl_openib_ib_pkey_val

This commit was SVN r18985.

The following Trac tickets were found above:
  Ticket 1295 --> https://svn.open-mpi.org/trac/ompi/ticket/1295

											
										
										
											2008-07-23 04:28:59 +04:00
+								    /* Whether this device supports eager RDMA */
-												Fixes trac:366

Add ability for ini files to recognize "use_eager_rdma" flag.  Set the
default to "no" (because we should assume that HCAs cannot support the
property necessary for using RDMA for eager messages -- that the last
byte of the message is guaranteed to be written to memory last --
unless proven otherwise.  For example, iWARP cards apparently do not
provide this guarantee), and then set all Mellanox and IBM HCAs to
override the default to enable this behavior on these cards.

This commit was SVN r12851.

The following Trac tickets were found above:
  Ticket 366 --> https://svn.open-mpi.org/trac/ompi/ticket/366

											
										
										
											2006-12-14 18:52:13 +03:00
+								    uint8_t use_eager_rdma;
-												Fixes trac:1295: change language in openib BTL from IB-specific to be
"!OpenFabrics" / neutral (i.e., refer to IB and/or iWARP).

 * Mostly just type, variable/field, and funcion name changes, such as
   s/hca/device/g, etc.  
 * Changed the INI file for the hardware-specific parameters to be
   mca-btl-openib-device-params.ini.
 * Updated a lot of help messages in the help-*.txt files, not just to
   update it to be !OpenFabrics/neutral language, but also for some
   consistency of tone, indenting, etc.
 * Deprecated a bunch of MCA params in favor of language-neutral new
   ones:
   * btl_openib_warn_no_hca_params_found (s/hca/device/)
   * btl_openib_hca_param_files
   * btl_openib_ib_cq_size (s/_ib_/_of_/)
   * btl_openib_ib_max_inline_data
   * btl_openib_ib_psn
   * btl_openib_ib_mtu
   * btl_openib_ib_pkey_ix
   * btl_openib_ib_pkey_val

This commit was SVN r18985.

The following Trac tickets were found above:
  Ticket 1295 --> https://svn.open-mpi.org/trac/ompi/ticket/1295

											
										
										
											2008-07-23 04:28:59 +04:00
+								    uint8_t btls;              /** < number of btls using this device */
-												Replace the ompi_pointer_array with opal_pointer_array. The next step
(sometimes after the merge with the ORTE branch), the opal_pointer_array
will became the only pointer_array implementation (the orte_pointer_array
will be removed).

This commit was SVN r17007.

											
										
										
											2007-12-21 09:02:00 +03:00
+								    opal_pointer_array_t *endpoints;
-												Fixes trac:1295: change language in openib BTL from IB-specific to be
"!OpenFabrics" / neutral (i.e., refer to IB and/or iWARP).

 * Mostly just type, variable/field, and funcion name changes, such as
   s/hca/device/g, etc.  
 * Changed the INI file for the hardware-specific parameters to be
   mca-btl-openib-device-params.ini.
 * Updated a lot of help messages in the help-*.txt files, not just to
   update it to be !OpenFabrics/neutral language, but also for some
   consistency of tone, indenting, etc.
 * Deprecated a bunch of MCA params in favor of language-neutral new
   ones:
   * btl_openib_warn_no_hca_params_found (s/hca/device/)
   * btl_openib_hca_param_files
   * btl_openib_ib_cq_size (s/_ib_/_of_/)
   * btl_openib_ib_max_inline_data
   * btl_openib_ib_psn
   * btl_openib_ib_mtu
   * btl_openib_ib_pkey_ix
   * btl_openib_ib_pkey_val

This commit was SVN r18985.

The following Trac tickets were found above:
  Ticket 1295 --> https://svn.open-mpi.org/trac/ompi/ticket/1295

											
										
										
											2008-07-23 04:28:59 +04:00
+								    opal_pointer_array_t *device_btls;
-												OpenIB BTL has three channels through which data can be received (eager rdma,
high prio QPs and low prio QPs) and because not all of them are polled each time
progrgess() is called (to save on latency) starvation is possible. The commit
fixes this. Now each channel is polled, but higher priority channels are polled
more often. Three new parameters are introduced that control polling ratios 
between different channels.

This commit was SVN r17024.

											
										
										
											2007-12-23 15:29:34 +03:00
+								    uint16_t hp_cq_polls;
 								    uint16_t eager_rdma_polls;
 								    bool pollme;
-												This is a very large change to rename several #define values from
OMPI_* to OPAL_*.  This allows opal layer to be used more independent
from the whole of ompi.

NOTE: 9 "svn mv" operations immediately follow this commit.

This commit was SVN r21180.

											
										
										
											2009-05-07 00:11:28 +04:00
+								#if OPAL_HAVE_THREADS
-												This commit brings in two major things:

1. Galen's fine-grain control of queue pair resources in the openib
   BTL.
1. Pasha's new implementation of asychronous HCA event handling.

Pasha's new implementation doesn't take much explanation, but the new
"multifrag" stuff does.  

Note that "svn merge" was not used to bring this new code from the
/tmp/ib_multifrag branch -- something Bad happened in the periodic
trunk pulls on that branch making an actual merge back to the trunk
effectively impossible (i.e., lots and lots of arbitrary conflicts and
artifical changes).  :-(

== Fine-grain control of queue pair resources ==

Galen's fine-grain control of queue pair resources to the OpenIB BTL
(thanks to Gleb for fixing broken code and providing additional
functionality, Pasha for finding broken code, and Jeff for doing all
the svn work and regression testing).

Prior to this commit, the OpenIB BTL created two queue pairs: one for
eager size fragments and one for max send size fragments.  When the
use of the shared receive queue (SRQ) was specified (via "-mca
btl_openib_use_srq 1"), these QPs would use a shared receive queue for
receive buffers instead of the default per-peer (PP) receive queues
and buffers.  One consequence of this design is that receive buffer
utilization (the size of the data received as a percentage of the
receive buffer used for the data) was quite poor for a number of
applications.

The new design allows multiple QPs to be specified at runtime.  Each
QP can be setup to use PP or SRQ receive buffers as well as giving
fine-grained control over receive buffer size, number of receive
buffers to post, when to replenish the receive queue (low water mark)
and for SRQ QPs, the number of outstanding sends can also be
specified.  The following is an example of the syntax to describe QPs
to the OpenIB BTL using the new MCA parameter btl_openib_receive_queues:

{{{
-mca btl_openib_receive_queues \
     "P,128,16,4;S,1024,256,128,32;S,4096,256,128,32;S,65536,256,128,32"
}}}

Each QP description is delimited by ";" (semicolon) with individual
fields of the QP description delimited by "," (comma).  The above
example therefore describes 4 QPs.

The first QP is:

    P,128,16,4

Meaning: per-peer receive buffer QPs are indicated by a starting field
of "P"; the first QP (shown above) is therefore a per-peer based QP.
The second field indicates the size of the receive buffer in bytes
(128 bytes).  The third field indicates the number of receive buffers
to allocate to the QP (16).  The fourth field indicates the low
watermark for receive buffers at which time the BTL will repost
receive buffers to the QP (4).

The second QP is:

    S,1024,256,128,32

Shared receive queue based QPs are indicated by a starting field of
"S"; the second QP (shown above) is therefore a shared receive queue
based QP.  The second, third and fourth fields are the same as in the
per-peer based QP.  The fifth field is the number of outstanding sends
that are allowed at a given time on the QP (32).  This provides a
"good enough" mechanism of flow control for some regular communication
patterns.

QPs MUST be specified in ascending receive buffer size order.  This
requirement may be removed prior to 1.3 release.

This commit was SVN r15474.

											
										
										
											2007-07-18 05:15:59 +04:00
+								    volatile bool got_fatal_event;
-												Start setting a flag when a port error is detected on the openib BTL.
At this point, it is just cleared (and ignored) so default behavior has not changed.
However, future failover support can take advantage of this flag.
Reviewed by Pasha Shamis.

This commit was SVN r23204.

											
										
										
											2010-05-24 22:57:55 +04:00
+								    volatile bool got_port_event;
-												Adding:
* openib_finalize flow for openib btl
* async event handler for openib btl

This commit was SVN r14623.

											
										
										
											2007-05-09 01:47:21 +04:00
+								#endif
-												Initial XRC support by Mellanox.

This commit was SVN r16787.

											
										
										
											2007-11-28 10:18:59 +03:00
+								#if HAVE_XRC
 								    struct ibv_xrc_domain *xrc_domain;
 								    int xrc_fd;
 								#endif
-												Fix compilation warnings.

This commit was SVN r17169.

											
										
										
											2008-01-21 18:07:39 +03:00
+								    int32_t non_eager_rdma_endpoints;
-												OpenIB BTL has three channels through which data can be received (eager rdma,
high prio QPs and low prio QPs) and because not all of them are polled each time
progrgess() is called (to save on latency) starvation is possible. The commit
fixes this. Now each channel is polled, but higher priority channels are polled
more often. Three new parameters are introduced that control polling ratios 
between different channels.

This commit was SVN r17024.

											
										
										
											2007-12-23 15:29:34 +03:00
+								    int32_t eager_rdma_buffers_count;
 								    struct mca_btl_base_endpoint_t **eager_rdma_buffers;
-												Create free lists of fragments per HCA, not per BTL. Saves memory in case of
multiple LMCs.

This commit was SVN r17082.

											
										
										
											2008-01-09 13:26:21 +03:00
+								    /**< frags for control massages */
 								    ompi_free_list_t send_free_control;
-												Fixes trac:1295: change language in openib BTL from IB-specific to be
"!OpenFabrics" / neutral (i.e., refer to IB and/or iWARP).

 * Mostly just type, variable/field, and funcion name changes, such as
   s/hca/device/g, etc.  
 * Changed the INI file for the hardware-specific parameters to be
   mca-btl-openib-device-params.ini.
 * Updated a lot of help messages in the help-*.txt files, not just to
   update it to be !OpenFabrics/neutral language, but also for some
   consistency of tone, indenting, etc.
 * Deprecated a bunch of MCA params in favor of language-neutral new
   ones:
   * btl_openib_warn_no_hca_params_found (s/hca/device/)
   * btl_openib_hca_param_files
   * btl_openib_ib_cq_size (s/_ib_/_of_/)
   * btl_openib_ib_max_inline_data
   * btl_openib_ib_psn
   * btl_openib_ib_mtu
   * btl_openib_ib_pkey_ix
   * btl_openib_ib_pkey_val

This commit was SVN r18985.

The following Trac tickets were found above:
  Ticket 1295 --> https://svn.open-mpi.org/trac/ompi/ticket/1295

											
										
										
											2008-07-23 04:28:59 +04:00
+								    /* QP types and attributes that will be used on this device */
 								    mca_btl_openib_device_qp_t *qps;
 								    /* Maximum value supported by this device for max_inline_data */
-												Fixes trac:1355: allow INI file to set max_inline_data vale, and if not
specified, probe for max value supported by device.

This commit was SVN r18720.

The following Trac tickets were found above:
  Ticket 1355 --> https://svn.open-mpi.org/trac/ompi/ticket/1355

											
										
										
											2008-06-24 21:18:07 +04:00
+								    uint32_t max_inline_data;
-												Fixes trac:1295: change language in openib BTL from IB-specific to be
"!OpenFabrics" / neutral (i.e., refer to IB and/or iWARP).

 * Mostly just type, variable/field, and funcion name changes, such as
   s/hca/device/g, etc.  
 * Changed the INI file for the hardware-specific parameters to be
   mca-btl-openib-device-params.ini.
 * Updated a lot of help messages in the help-*.txt files, not just to
   update it to be !OpenFabrics/neutral language, but also for some
   consistency of tone, indenting, etc.
 * Deprecated a bunch of MCA params in favor of language-neutral new
   ones:
   * btl_openib_warn_no_hca_params_found (s/hca/device/)
   * btl_openib_hca_param_files
   * btl_openib_ib_cq_size (s/_ib_/_of_/)
   * btl_openib_ib_max_inline_data
   * btl_openib_ib_psn
   * btl_openib_ib_mtu
   * btl_openib_ib_pkey_ix
   * btl_openib_ib_pkey_val

This commit was SVN r18985.

The following Trac tickets were found above:
  Ticket 1295 --> https://svn.open-mpi.org/trac/ompi/ticket/1295

											
										
										
											2008-07-23 04:28:59 +04:00
+								} mca_btl_openib_device_t;
 								OBJ_CLASS_DECLARATION(mca_btl_openib_device_t);
-												 - Add a few comments for elements for structs, a few spelling fixes.
   No functional change.

This commit was SVN r14534.

											
										
										
											2007-04-27 01:03:38 +04:00
-												This commit brings in two major things:

1. Galen's fine-grain control of queue pair resources in the openib
   BTL.
1. Pasha's new implementation of asychronous HCA event handling.

Pasha's new implementation doesn't take much explanation, but the new
"multifrag" stuff does.  

Note that "svn merge" was not used to bring this new code from the
/tmp/ib_multifrag branch -- something Bad happened in the periodic
trunk pulls on that branch making an actual merge back to the trunk
effectively impossible (i.e., lots and lots of arbitrary conflicts and
artifical changes).  :-(

== Fine-grain control of queue pair resources ==

Galen's fine-grain control of queue pair resources to the OpenIB BTL
(thanks to Gleb for fixing broken code and providing additional
functionality, Pasha for finding broken code, and Jeff for doing all
the svn work and regression testing).

Prior to this commit, the OpenIB BTL created two queue pairs: one for
eager size fragments and one for max send size fragments.  When the
use of the shared receive queue (SRQ) was specified (via "-mca
btl_openib_use_srq 1"), these QPs would use a shared receive queue for
receive buffers instead of the default per-peer (PP) receive queues
and buffers.  One consequence of this design is that receive buffer
utilization (the size of the data received as a percentage of the
receive buffer used for the data) was quite poor for a number of
applications.

The new design allows multiple QPs to be specified at runtime.  Each
QP can be setup to use PP or SRQ receive buffers as well as giving
fine-grained control over receive buffer size, number of receive
buffers to post, when to replenish the receive queue (low water mark)
and for SRQ QPs, the number of outstanding sends can also be
specified.  The following is an example of the syntax to describe QPs
to the OpenIB BTL using the new MCA parameter btl_openib_receive_queues:

{{{
-mca btl_openib_receive_queues \
     "P,128,16,4;S,1024,256,128,32;S,4096,256,128,32;S,65536,256,128,32"
}}}

Each QP description is delimited by ";" (semicolon) with individual
fields of the QP description delimited by "," (comma).  The above
example therefore describes 4 QPs.

The first QP is:

    P,128,16,4

Meaning: per-peer receive buffer QPs are indicated by a starting field
of "P"; the first QP (shown above) is therefore a per-peer based QP.
The second field indicates the size of the receive buffer in bytes
(128 bytes).  The third field indicates the number of receive buffers
to allocate to the QP (16).  The fourth field indicates the low
watermark for receive buffers at which time the BTL will repost
receive buffers to the QP (4).

The second QP is:

    S,1024,256,128,32

Shared receive queue based QPs are indicated by a starting field of
"S"; the second QP (shown above) is therefore a shared receive queue
based QP.  The second, third and fourth fields are the same as in the
per-peer based QP.  The fifth field is the number of outstanding sends
that are allowed at a given time on the QP (32).  This provides a
"good enough" mechanism of flow control for some regular communication
patterns.

QPs MUST be specified in ascending receive buffer size order.  This
requirement may be removed prior to 1.3 release.

This commit was SVN r15474.

											
										
										
											2007-07-18 05:15:59 +04:00
+								struct mca_btl_openib_module_pp_qp_t {
 								    int32_t dummy;
 								}; typedef struct mca_btl_openib_module_pp_qp_t mca_btl_openib_module_pp_qp_t;
 								struct mca_btl_openib_module_srq_qp_t {
-												Remove trailing whitespaces. No code changes in this commit.

This commit was SVN r17167.

											
										
										
											2008-01-21 15:11:18 +03:00
+								    struct ibv_srq *srq;
 								    int32_t rd_posted;
 								    int32_t sd_credits;  /* the max number of outstanding sends on a QP when using SRQ */
 								                         /*  i.e. the number of frags that  can be outstanding (down counter) */
-												If there is an eager rdma credit, but there is no WQE to send a packet we add it
to a pending queue of eager rdma QP instead of correct pending list. This patch
fixes this by getting reed of "eager rdma qp" notion. Packet is always send
over its order QP. The patch also adds two pending queues for high and low prio
packets. Only high prio packets are sent over eager RDMA channel.

This commit was SVN r16780.

											
										
										
											2007-11-28 10:12:44 +03:00
+								    opal_list_t pending_frags[2];    /**< list of high/low prio frags */
-												Adding support for on-demand SRQ pre-post (receive wqe allocation)

This commit was SVN r22313.

											
										
										
											2009-12-15 18:52:10 +03:00
+								    /** The number of receive buffers that can be post in the current time.
 								        The value may be increased in the IBV_EVENT_SRQ_LIMIT_REACHED
 								        event handler. The value starts from (rd_num / 4) and increased up to rd_num */
 								    int32_t rd_curr_num;
 								    /** We post additional WQEs only if a number of WQEs (in specific SRQ) is less of this value.
 								         The value increased together with rd_curr_num. The value is unique for every SRQ. */
 								    int32_t rd_low_local;
 								    /** The flag points if we want to get the
 								         IBV_EVENT_SRQ_LIMIT_REACHED events for dynamically resizing SRQ */
 								    bool srq_limit_event_flag;
-												Jeff Squyres fixes

This commit was SVN r22319.

											
										
										
											2009-12-16 13:23:58 +03:00
+								    /**< In difference of the "--mca enable_srq_resize" parameter that says, if we want(or no)
 								         to start with small num of pre-posted receive buffers (rd_curr_num) and to increase this number by needs
 								         (the max of this value is rd_num <EFBFBD> the whole size of SRQ), the "srq_limit_event_flag" says if we want to get limit event
 								         from device if the defined srq limit was reached (signal to the main thread) and we put off this flag if the rd_curr_num
 								         was increased up to rd_num.
 								         In order to prevent lock/unlock operation in the critical path we prefer only put-on
 								         the srq_limit_event_flag in asynchronous thread, because in this way we post receive buffers
 								         in the main thread only and only after posting we set (if srq_limit_event_flag is true)
 								         the limit for IBV_EVENT_SRQ_LIMIT_REACHED event. */
-												This commit brings in two major things:

1. Galen's fine-grain control of queue pair resources in the openib
   BTL.
1. Pasha's new implementation of asychronous HCA event handling.

Pasha's new implementation doesn't take much explanation, but the new
"multifrag" stuff does.  

Note that "svn merge" was not used to bring this new code from the
/tmp/ib_multifrag branch -- something Bad happened in the periodic
trunk pulls on that branch making an actual merge back to the trunk
effectively impossible (i.e., lots and lots of arbitrary conflicts and
artifical changes).  :-(

== Fine-grain control of queue pair resources ==

Galen's fine-grain control of queue pair resources to the OpenIB BTL
(thanks to Gleb for fixing broken code and providing additional
functionality, Pasha for finding broken code, and Jeff for doing all
the svn work and regression testing).

Prior to this commit, the OpenIB BTL created two queue pairs: one for
eager size fragments and one for max send size fragments.  When the
use of the shared receive queue (SRQ) was specified (via "-mca
btl_openib_use_srq 1"), these QPs would use a shared receive queue for
receive buffers instead of the default per-peer (PP) receive queues
and buffers.  One consequence of this design is that receive buffer
utilization (the size of the data received as a percentage of the
receive buffer used for the data) was quite poor for a number of
applications.

The new design allows multiple QPs to be specified at runtime.  Each
QP can be setup to use PP or SRQ receive buffers as well as giving
fine-grained control over receive buffer size, number of receive
buffers to post, when to replenish the receive queue (low water mark)
and for SRQ QPs, the number of outstanding sends can also be
specified.  The following is an example of the syntax to describe QPs
to the OpenIB BTL using the new MCA parameter btl_openib_receive_queues:

{{{
-mca btl_openib_receive_queues \
     "P,128,16,4;S,1024,256,128,32;S,4096,256,128,32;S,65536,256,128,32"
}}}

Each QP description is delimited by ";" (semicolon) with individual
fields of the QP description delimited by "," (comma).  The above
example therefore describes 4 QPs.

The first QP is:

    P,128,16,4

Meaning: per-peer receive buffer QPs are indicated by a starting field
of "P"; the first QP (shown above) is therefore a per-peer based QP.
The second field indicates the size of the receive buffer in bytes
(128 bytes).  The third field indicates the number of receive buffers
to allocate to the QP (16).  The fourth field indicates the low
watermark for receive buffers at which time the BTL will repost
receive buffers to the QP (4).

The second QP is:

    S,1024,256,128,32

Shared receive queue based QPs are indicated by a starting field of
"S"; the second QP (shown above) is therefore a shared receive queue
based QP.  The second, third and fourth fields are the same as in the
per-peer based QP.  The fifth field is the number of outstanding sends
that are allowed at a given time on the QP (32).  This provides a
"good enough" mechanism of flow control for some regular communication
patterns.

QPs MUST be specified in ascending receive buffer size order.  This
requirement may be removed prior to 1.3 release.

This commit was SVN r15474.

											
										
										
											2007-07-18 05:15:59 +04:00
+								}; typedef struct mca_btl_openib_module_srq_qp_t mca_btl_openib_module_srq_qp_t;
 								struct mca_btl_openib_module_qp_t {
 								    union {
 								        mca_btl_openib_module_pp_qp_t pp_qp;
 								        mca_btl_openib_module_srq_qp_t srq_qp;
 								    } u;
 								}; typedef struct mca_btl_openib_module_qp_t mca_btl_openib_module_qp_t;
-												removed the ib ptl and fixed a few conflicts in my previous commit

This commit was SVN r6244.

											
										
										
											2005-07-01 01:28:35 +04:00
+								/**
-												 - Add a few comments for elements for structs, a few spelling fixes.
   No functional change.

This commit was SVN r14534.

											
										
										
											2007-04-27 01:03:38 +04:00
+								 * IB BTL Interface
-												removed the ib ptl and fixed a few conflicts in my previous commit

This commit was SVN r6244.

											
										
										
											2005-07-01 01:28:35 +04:00
+								 */
 								struct mca_btl_openib_module_t {
-												Merge in /tmp-public/cpc3 branch to trunk.  oob/xoob still remains the
default CPC.

This commit was SVN r18356.

											
										
										
											2008-05-02 15:52:33 +04:00
+								    /* Base BTL module */
 								    mca_btl_base_module_t  super;
-												Remove trailing whitespaces. No code changes in this commit.

This commit was SVN r17167.

											
										
										
											2008-01-21 15:11:18 +03:00
+								    bool btl_inited;
-												Merge in /tmp-public/cpc3 branch to trunk.  oob/xoob still remains the
default CPC.

This commit was SVN r18356.

											
										
										
											2008-05-02 15:52:33 +04:00
 								    /** Common information about all ports */
 								    mca_btl_openib_modex_message_t port_info;
 								    /** Array of CPCs on this port */
 								    ompi_btl_openib_connect_base_module_t **cpcs;
 								    /** Number of elements in the cpcs array */
 								    uint8_t num_cpcs;
-												Fixes trac:1295: change language in openib BTL from IB-specific to be
"!OpenFabrics" / neutral (i.e., refer to IB and/or iWARP).

 * Mostly just type, variable/field, and funcion name changes, such as
   s/hca/device/g, etc.  
 * Changed the INI file for the hardware-specific parameters to be
   mca-btl-openib-device-params.ini.
 * Updated a lot of help messages in the help-*.txt files, not just to
   update it to be !OpenFabrics/neutral language, but also for some
   consistency of tone, indenting, etc.
 * Deprecated a bunch of MCA params in favor of language-neutral new
   ones:
   * btl_openib_warn_no_hca_params_found (s/hca/device/)
   * btl_openib_hca_param_files
   * btl_openib_ib_cq_size (s/_ib_/_of_/)
   * btl_openib_ib_max_inline_data
   * btl_openib_ib_psn
   * btl_openib_ib_mtu
   * btl_openib_ib_pkey_ix
   * btl_openib_ib_pkey_val

This commit was SVN r18985.

The following Trac tickets were found above:
  Ticket 1295 --> https://svn.open-mpi.org/trac/ompi/ticket/1295

											
										
										
											2008-07-23 04:28:59 +04:00
+								    mca_btl_openib_device_t *device;
-												Remove trailing whitespaces. No code changes in this commit.

This commit was SVN r17167.

											
										
										
											2008-01-21 15:11:18 +03:00
+								    uint8_t port_num;                  /**< ID of the PORT */
-												Add pkey value MCA parameter. if this param is used,
only ports with the actual pkey value will be initiate.

This commit was SVN r14463.

											
										
										
											2007-04-22 14:22:12 +04:00
+								    uint16_t pkey_index;
-												Remove trailing whitespaces. No code changes in this commit.

This commit was SVN r17167.

											
										
										
											2008-01-21 15:11:18 +03:00
+								    struct ibv_port_attr ib_port_attr;
-												Support for LMC (lid mask count) and multiple QPs per port.

This commit was SVN r10536.

											
										
										
											2006-06-28 11:23:08 +04:00
+								    uint16_t lid;                      /**< lid that is actually used (for LMC) */
-												Adding support for APM over different ports

This commit was SVN r17521.

											
										
										
											2008-02-20 16:44:05 +03:00
+								    int apm_port;                      /**< Alternative port that may be used for APM */
-												Support for LMC (lid mask count) and multiple QPs per port.

This commit was SVN r10536.

											
										
										
											2006-06-28 11:23:08 +04:00
+								    uint8_t src_path_bits;             /**< offset from base lid (for LMC) */
-												Remove trailing whitespaces. No code changes in this commit.

This commit was SVN r17167.

											
										
										
											2008-01-21 15:11:18 +03:00
-												This commit brings in two major things:

1. Galen's fine-grain control of queue pair resources in the openib
   BTL.
1. Pasha's new implementation of asychronous HCA event handling.

Pasha's new implementation doesn't take much explanation, but the new
"multifrag" stuff does.  

Note that "svn merge" was not used to bring this new code from the
/tmp/ib_multifrag branch -- something Bad happened in the periodic
trunk pulls on that branch making an actual merge back to the trunk
effectively impossible (i.e., lots and lots of arbitrary conflicts and
artifical changes).  :-(

== Fine-grain control of queue pair resources ==

Galen's fine-grain control of queue pair resources to the OpenIB BTL
(thanks to Gleb for fixing broken code and providing additional
functionality, Pasha for finding broken code, and Jeff for doing all
the svn work and regression testing).

Prior to this commit, the OpenIB BTL created two queue pairs: one for
eager size fragments and one for max send size fragments.  When the
use of the shared receive queue (SRQ) was specified (via "-mca
btl_openib_use_srq 1"), these QPs would use a shared receive queue for
receive buffers instead of the default per-peer (PP) receive queues
and buffers.  One consequence of this design is that receive buffer
utilization (the size of the data received as a percentage of the
receive buffer used for the data) was quite poor for a number of
applications.

The new design allows multiple QPs to be specified at runtime.  Each
QP can be setup to use PP or SRQ receive buffers as well as giving
fine-grained control over receive buffer size, number of receive
buffers to post, when to replenish the receive queue (low water mark)
and for SRQ QPs, the number of outstanding sends can also be
specified.  The following is an example of the syntax to describe QPs
to the OpenIB BTL using the new MCA parameter btl_openib_receive_queues:

{{{
-mca btl_openib_receive_queues \
     "P,128,16,4;S,1024,256,128,32;S,4096,256,128,32;S,65536,256,128,32"
}}}

Each QP description is delimited by ";" (semicolon) with individual
fields of the QP description delimited by "," (comma).  The above
example therefore describes 4 QPs.

The first QP is:

    P,128,16,4

Meaning: per-peer receive buffer QPs are indicated by a starting field
of "P"; the first QP (shown above) is therefore a per-peer based QP.
The second field indicates the size of the receive buffer in bytes
(128 bytes).  The third field indicates the number of receive buffers
to allocate to the QP (16).  The fourth field indicates the low
watermark for receive buffers at which time the BTL will repost
receive buffers to the QP (4).

The second QP is:

    S,1024,256,128,32

Shared receive queue based QPs are indicated by a starting field of
"S"; the second QP (shown above) is therefore a shared receive queue
based QP.  The second, third and fourth fields are the same as in the
per-peer based QP.  The fifth field is the number of outstanding sends
that are allowed at a given time on the QP (32).  This provides a
"good enough" mechanism of flow control for some regular communication
patterns.

QPs MUST be specified in ascending receive buffer size order.  This
requirement may be removed prior to 1.3 release.

This commit was SVN r15474.

											
										
										
											2007-07-18 05:15:59 +04:00
+								    int32_t num_peers;
-												Remove trailing whitespaces. No code changes in this commit.

This commit was SVN r17167.

											
										
										
											2008-01-21 15:11:18 +03:00
 								    opal_mutex_t ib_lock;              /**< module level lock */
-												 - Add a few comments for elements for structs, a few spelling fixes.
   No functional change.

This commit was SVN r14534.

											
										
										
											2007-04-27 01:03:38 +04:00
+								    size_t eager_rdma_frag_size;                /**< length of eager frag */
-												OpenIB BTL has three channels through which data can be received (eager rdma,
high prio QPs and low prio QPs) and because not all of them are polled each time
progrgess() is called (to save on latency) starvation is possible. The commit
fixes this. Now each channel is polled, but higher priority channels are polled
more often. Three new parameters are introduced that control polling ratios 
between different channels.

This commit was SVN r17024.

											
										
										
											2007-12-23 15:29:34 +03:00
+								    volatile int32_t eager_rdma_channels;  /**< number of open RDMA channels */
-												Add error callback to the btl interface, this allows error to be delivered to
the upperlayer assynchronously although there are some issues with this.. such
as there are multiple consumers of the btl's.. who get's the

This commit was SVN r11232.

											
										
										
											2006-08-17 00:21:38 +04:00
 								    mca_btl_base_module_error_cb_fn_t error_cb; /**< error handler */
-												Remove trailing whitespaces. No code changes in this commit.

This commit was SVN r17167.

											
										
										
											2008-01-21 15:11:18 +03:00
-												This commit brings in two major things:

1. Galen's fine-grain control of queue pair resources in the openib
   BTL.
1. Pasha's new implementation of asychronous HCA event handling.

Pasha's new implementation doesn't take much explanation, but the new
"multifrag" stuff does.  

Note that "svn merge" was not used to bring this new code from the
/tmp/ib_multifrag branch -- something Bad happened in the periodic
trunk pulls on that branch making an actual merge back to the trunk
effectively impossible (i.e., lots and lots of arbitrary conflicts and
artifical changes).  :-(

== Fine-grain control of queue pair resources ==

Galen's fine-grain control of queue pair resources to the OpenIB BTL
(thanks to Gleb for fixing broken code and providing additional
functionality, Pasha for finding broken code, and Jeff for doing all
the svn work and regression testing).

Prior to this commit, the OpenIB BTL created two queue pairs: one for
eager size fragments and one for max send size fragments.  When the
use of the shared receive queue (SRQ) was specified (via "-mca
btl_openib_use_srq 1"), these QPs would use a shared receive queue for
receive buffers instead of the default per-peer (PP) receive queues
and buffers.  One consequence of this design is that receive buffer
utilization (the size of the data received as a percentage of the
receive buffer used for the data) was quite poor for a number of
applications.

The new design allows multiple QPs to be specified at runtime.  Each
QP can be setup to use PP or SRQ receive buffers as well as giving
fine-grained control over receive buffer size, number of receive
buffers to post, when to replenish the receive queue (low water mark)
and for SRQ QPs, the number of outstanding sends can also be
specified.  The following is an example of the syntax to describe QPs
to the OpenIB BTL using the new MCA parameter btl_openib_receive_queues:

{{{
-mca btl_openib_receive_queues \
     "P,128,16,4;S,1024,256,128,32;S,4096,256,128,32;S,65536,256,128,32"
}}}

Each QP description is delimited by ";" (semicolon) with individual
fields of the QP description delimited by "," (comma).  The above
example therefore describes 4 QPs.

The first QP is:

    P,128,16,4

Meaning: per-peer receive buffer QPs are indicated by a starting field
of "P"; the first QP (shown above) is therefore a per-peer based QP.
The second field indicates the size of the receive buffer in bytes
(128 bytes).  The third field indicates the number of receive buffers
to allocate to the QP (16).  The fourth field indicates the low
watermark for receive buffers at which time the BTL will repost
receive buffers to the QP (4).

The second QP is:

    S,1024,256,128,32

Shared receive queue based QPs are indicated by a starting field of
"S"; the second QP (shown above) is therefore a shared receive queue
based QP.  The second, third and fourth fields are the same as in the
per-peer based QP.  The fifth field is the number of outstanding sends
that are allowed at a given time on the QP (32).  This provides a
"good enough" mechanism of flow control for some regular communication
patterns.

QPs MUST be specified in ascending receive buffer size order.  This
requirement may be removed prior to 1.3 release.

This commit was SVN r15474.

											
										
										
											2007-07-18 05:15:59 +04:00
+								    mca_btl_openib_module_qp_t * qps;
-												 - Add a few comments for elements for structs, a few spelling fixes.
   No functional change.

This commit was SVN r14534.

											
										
										
											2007-04-27 01:03:38 +04:00
+								};
 								typedef struct mca_btl_openib_module_t mca_btl_openib_module_t;
-												Merge with gleb-mpool branch. All RDMA components use same mpool now (rdma).
udapl/openib/vapi/gm mpools a deprecated. rdma mpool has parameter that allows
to limit its size mpool_rdma_rcache_size_limit (default is 0 - unlimited).

This commit was SVN r12878.

											
										
										
											2006-12-17 15:26:41 +03:00
-												removed the ib ptl and fixed a few conflicts in my previous commit

This commit was SVN r6244.

											
										
										
											2005-07-01 01:28:35 +04:00
+								extern mca_btl_openib_module_t mca_btl_openib_module;
-												Merge with gleb-mpool branch. All RDMA components use same mpool now (rdma).
udapl/openib/vapi/gm mpools a deprecated. rdma mpool has parameter that allows
to limit its size mpool_rdma_rcache_size_limit (default is 0 - unlimited).

This commit was SVN r12878.

											
										
										
											2006-12-17 15:26:41 +03:00
+								struct mca_btl_openib_reg_t {
 								    mca_mpool_base_registration_t base;
 								    struct ibv_mr *mr;
 								};
 								typedef struct mca_btl_openib_reg_t mca_btl_openib_reg_t;
-Update libevent to the 2.0 series, currently at 2.0.7rc. We will update to their final release when it becomes available. Currently known errors exist in unused portions of the libevent code. This revision passes the IBM test suite on a Linux machine and on a standalone Mac.

This is a fairly intrusive change, but outside of the moving of opal/event to opal/mca/event, the only changes involved (a) changing all calls to opal_event functions to reflect the new framework instead, and (b) ensuring that all opal_event_t objects are properly constructed since they are now true opal_objects.

Note: Shiqing has just returned from vacation and has not yet had a chance to complete the Windows integration. Thus, this commit almost certainly breaks Windows support on the trunk. However, I want this to have a chance to soak for as long as possible before I become less available a week from today (going to be at a class for 5 days, and thus will only be sparingly available) so we can find and fix any problems.

Biggest change is moving the libevent code from opal/event to a new opal/mca/event framework. This was done to make it much easier to update libevent in the future. New versions can be inserted as a new component and tested in parallel with the current version until validated, then we can remove the earlier version if we so choose. This is a statically built framework ala installdirs, so only one component will build at a time. There is no selection logic - the sole compiled component simply loads its function pointers into the opal_event struct.

I have gone thru the code base and converted all the libevent calls I could find. However, I cannot compile nor test every environment. It is therefore quite likely that errors remain in the system. Please keep an eye open for two things:

1. compile-time errors: these will be obvious as calls to the old functions (e.g., opal_evtimer_new) must be replaced by the new framework APIs (e.g., opal_event.evtimer_new)

2. run-time errors: these will likely show up as segfaults due to missing constructors on opal_event_t objects. It appears that it became a typical practice for people to "init" an opal_event_t by simply using memset to zero it out. This will no longer work - you must either OBJ_NEW or OBJ_CONSTRUCT an opal_event_t. I tried to catch these cases, but may have missed some. Believe me, you'll know when you hit it.

There is also the issue of the new libevent "no recursion" behavior. As I described on a recent email, we will have to discuss this and figure out what, if anything, we need to do.

This commit was SVN r23925.

											
										
										
											2010-10-24 22:35:54 +04:00
+								#if OMPI_ENABLE_PROGRESS_THREADS == 1
-												Adding progress thread support to OpenIB BTL.

Reviewed by Gleb.

This commit was SVN r12411.

											
										
										
											2006-11-02 19:15:21 +03:00
+								extern void* mca_btl_openib_progress_thread(opal_object_t*);
 								#endif
-												 - Add a few comments for elements for structs, a few spelling fixes.
   No functional change.

This commit was SVN r14534.

											
										
										
											2007-04-27 01:03:38 +04:00
-												Add error callback to the btl interface, this allows error to be delivered to
the upperlayer assynchronously although there are some issues with this.. such
as there are multiple consumers of the btl's.. who get's the

This commit was SVN r11232.

											
										
										
											2006-08-17 00:21:38 +04:00
 								/**
 								 * Register a callback function that is called on error..
 								 *
 								 * @param btl (IN)     BTL module
 								 * @return             Status indicating if cleanup was successful
 								 */
 								int mca_btl_openib_register_error_cb(
 								    struct mca_btl_base_module_t* btl,
 								    mca_btl_base_module_error_cb_fn_t cbfunc
 								);
-												Remove trailing whitespaces. No code changes in this commit.

This commit was SVN r17167.

											
										
										
											2008-01-21 15:11:18 +03:00
-												removed the ib ptl and fixed a few conflicts in my previous commit

This commit was SVN r6244.

											
										
										
											2005-07-01 01:28:35 +04:00
 								/**
 								 * Cleanup any resources held by the BTL.
-												Remove trailing whitespaces. No code changes in this commit.

This commit was SVN r17167.

											
										
										
											2008-01-21 15:11:18 +03:00
+								 *
-												removed the ib ptl and fixed a few conflicts in my previous commit

This commit was SVN r6244.

											
										
										
											2005-07-01 01:28:35 +04:00
+								 * @param btl  BTL instance.
 								 * @return     OMPI_SUCCESS or error status on failure.
 								 */
 								extern int mca_btl_openib_finalize(
 								    struct mca_btl_base_module_t* btl
 								);
 								/**
 								 * PML->BTL notification of change in the process list.
-												Remove trailing whitespaces. No code changes in this commit.

This commit was SVN r17167.

											
										
										
											2008-01-21 15:11:18 +03:00
+								 *
-												 - Add a few comments for elements for structs, a few spelling fixes.
   No functional change.

This commit was SVN r14534.

											
										
										
											2007-04-27 01:03:38 +04:00
+								 * @param btl (IN)            BTL module
 								 * @param nprocs (IN)         Number of processes
 								 * @param procs (IN)          Set of processes
 								 * @param peers (OUT)         Set of (optional) peer addressing info.
 								 * @param reachable (IN/OUT)  Set of processes that are reachable via this BTL.
-												removed the ib ptl and fixed a few conflicts in my previous commit

This commit was SVN r6244.

											
										
										
											2005-07-01 01:28:35 +04:00
+								 * @return     OMPI_SUCCESS or error status on failure.
-												Remove trailing whitespaces. No code changes in this commit.

This commit was SVN r17167.

											
										
										
											2008-01-21 15:11:18 +03:00
+								 *
-												removed the ib ptl and fixed a few conflicts in my previous commit

This commit was SVN r6244.

											
										
										
											2005-07-01 01:28:35 +04:00
+								 */
 								extern int mca_btl_openib_add_procs(
 								    struct mca_btl_base_module_t* btl,
 								    size_t nprocs,
 								    struct ompi_proc_t **procs,
 								    struct mca_btl_base_endpoint_t** peers,
-												 - As discussed on RFC, move the ompi_bitmap to the
   opal layer.
   Add a check against a maximum (actually get rid of ifs internally to
   opal_bitmap.c) -- the functionality to set the current maximum size
   opal_bitmap_set_max_size() is currently only used in attribute.c
   to set the maximum OMPI_FORTRAN_HANDLE_MAX...

   Tested on linux/x86-64 with intel-tests with all_tests_no_perf_f
   run with 6 procs.
   Let's look into MTT as well...

This commit was SVN r20708.

											
										
										
											2009-03-04 01:25:13 +03:00
+								    opal_bitmap_t* reachable
-												removed the ib ptl and fixed a few conflicts in my previous commit

This commit was SVN r6244.

											
										
										
											2005-07-01 01:28:35 +04:00
+								);
 								/**
 								 * PML->BTL notification of change in the process list.
 								 *
 								 * @param btl (IN)     BTL instance
 								 * @param nproc (IN)   Number of processes.
 								 * @param procs (IN)   Set of processes.
 								 * @param peers (IN)   Set of peer data structures.
 								 * @return             Status indicating if cleanup was successful
 								 *
 								 */
 								extern int mca_btl_openib_del_procs(
 								    struct mca_btl_base_module_t* btl,
 								    size_t nprocs,
 								    struct ompi_proc_t **procs,
 								    struct mca_btl_base_endpoint_t** peers
 								);
 								/**
 								 * PML->BTL Initiate a send of the specified size.
 								 *
 								 * @param btl (IN)               BTL instance
-												 - Add a few comments for elements for structs, a few spelling fixes.
   No functional change.

This commit was SVN r14534.

											
										
										
											2007-04-27 01:03:38 +04:00
+								 * @param btl_peer (IN)          BTL peer addressing
 								 * @param descriptor (IN)        Descriptor of data to be transmitted.
 								 * @param tag (IN)               Tag.
-												removed the ib ptl and fixed a few conflicts in my previous commit

This commit was SVN r6244.

											
										
										
											2005-07-01 01:28:35 +04:00
+								 */
 								extern int mca_btl_openib_send(
 								    struct mca_btl_base_module_t* btl,
 								    struct mca_btl_base_endpoint_t* btl_peer,
-												Remove trailing whitespaces. No code changes in this commit.

This commit was SVN r17167.

											
										
										
											2008-01-21 15:11:18 +03:00
+								    struct mca_btl_base_descriptor_t* descriptor,
-												removed the ib ptl and fixed a few conflicts in my previous commit

This commit was SVN r6244.

											
										
										
											2005-07-01 01:28:35 +04:00
+								    mca_btl_base_tag_t tag
 								);
-												Adding send_immediate (sendi) implementation to openib btl.

This commit was SVN r20881.

											
										
										
											2009-03-25 19:53:26 +03:00
+								/**
 								 * PML->BTL Initiate a immediate send of the specified size.
 								 *
 								 * @param btl (IN)               BTL instance
 								 * @param ep (IN)                Endpoint
 								 * @param convertor (IN)         Datatypes converter
 								 * @param header (IN)            PML header
 								 * @param header_size (IN)       PML header size
 								 * @param payload_size (IN)      Payload size
 								 * @param order (IN)             Order
 								 * @param flags (IN)             Flags
 								 * @param tag (IN)               Tag
 								 * @param descriptor (OUT)       Messages descriptor
 								 */
 								extern int mca_btl_openib_sendi( struct mca_btl_base_module_t* btl,
 								    struct mca_btl_base_endpoint_t* ep,
- - Split the datatype engine into two parts: an MPI specific part in
   OMPI
   and a language agnostic part in OPAL. The convertor is completely
   moved into OPAL.  This offers several benefits as described in RFC
   http://www.open-mpi.org/community/lists/devel/2009/07/6387.php
   namely:
    - Fewer basic types (int* and float* types, boolean and wchar
    - Fixing naming scheme to ompi-nomenclature.
    - Usability outside of the ompi-layer.
 - Due to the fixed nature of simple opal types, their information is
   completely
   known at compile time and therefore constified
 - With fewer datatypes (22), the actual sizes of bit-field types may be
   reduced
   from 64 to 32 bits, allowing reorganizing the opal_datatype
   structure, eliminating holes and keeping data required in convertor
   (upon send/recv) in one cacheline...
   This has implications to the convertor-datastructure and other parts
   of the code.
 - Several performance tests have been run, the netpipe latency does not
   change with
   this patch on Linux/x86-64 on the smoky cluster.
 - Extensive tests have been done to verify correctness (no new
   regressions) using:
   1. mpi_test_suite on linux/x86-64 using clean ompi-trunk and
    ompi-ddt:
    a. running both trunk and ompi-ddt resulted in no differences
       (except for MPI_SHORT_INT and MPI_TYPE_MIX_LB_UB do now run
       correctly).
    b. with --enable-memchecker and running under valgrind (one buglet
       when run with static found in test-suite, commited)
   2. ibm testsuite on linux/x86-64 using clean ompi-trunk and ompi-ddt:
      all passed (except for the dynamic/ tests failed!! as trunk/MTT)
   3. compilation and usage of HDF5 tests on Jaguar using PGI and
      PathScale compilers.
   4. compilation and usage on Scicortex.
 - Please note, that for the heterogeneous case, (-m32 compiled
   binaries/ompi), neither
   ompi-trunk, nor ompi-ddt branch would successfully launch.

This commit was SVN r21641.

											
										
										
											2009-07-13 08:56:31 +04:00
+								    struct opal_convertor_t* convertor,
-												Adding send_immediate (sendi) implementation to openib btl.

This commit was SVN r20881.

											
										
										
											2009-03-25 19:53:26 +03:00
+								    void* header,
 								    size_t header_size,
 								    size_t payload_size,
 								    uint8_t order,
 								    uint32_t flags,
 								    mca_btl_base_tag_t tag,
 								    mca_btl_base_descriptor_t** descriptor
 								);
-												removed the ib ptl and fixed a few conflicts in my previous commit

This commit was SVN r6244.

											
										
										
											2005-07-01 01:28:35 +04:00
+								/**
 								 * PML->BTL Initiate a put of the specified size.
 								 *
 								 * @param btl (IN)               BTL instance
-												 - Add a few comments for elements for structs, a few spelling fixes.
   No functional change.

This commit was SVN r14534.

											
										
										
											2007-04-27 01:03:38 +04:00
+								 * @param btl_peer (IN)          BTL peer addressing
 								 * @param descriptor (IN)        Descriptor of data to be transmitted.
-												removed the ib ptl and fixed a few conflicts in my previous commit

This commit was SVN r6244.

											
										
										
											2005-07-01 01:28:35 +04:00
+								 */
 								extern int mca_btl_openib_put(
 								    struct mca_btl_base_module_t* btl,
 								    struct mca_btl_base_endpoint_t* btl_peer,
-												 - Add a few comments for elements for structs, a few spelling fixes.
   No functional change.

This commit was SVN r14534.

											
										
										
											2007-04-27 01:03:38 +04:00
+								    struct mca_btl_base_descriptor_t* descriptor
-												Added support for openib RDMA READ.. note that performance is currently an
issue so PUT is default.. We are determining if this is an openib issue or a
btl issue as we have seen performance increases on mvapi. 

This commit was SVN r6928.

											
										
										
											2005-08-18 21:08:27 +04:00
+								    );
 								/**
 								 * PML->BTL Initiate a get of the specified size.
 								 *
 								 * @param btl (IN)               BTL instance
 								 * @param btl_base_peer (IN)     BTL peer addressing
-												 - Add a few comments for elements for structs, a few spelling fixes.
   No functional change.

This commit was SVN r14534.

											
										
										
											2007-04-27 01:03:38 +04:00
+								 * @param descriptor (IN)        Descriptor of data to be transmitted.
-												Added support for openib RDMA READ.. note that performance is currently an
issue so PUT is default.. We are determining if this is an openib issue or a
btl issue as we have seen performance increases on mvapi. 

This commit was SVN r6928.

											
										
										
											2005-08-18 21:08:27 +04:00
+								 */
 								extern int mca_btl_openib_get(
 								    struct mca_btl_base_module_t* btl,
 								    struct mca_btl_base_endpoint_t* btl_peer,
-												 - Add a few comments for elements for structs, a few spelling fixes.
   No functional change.

This commit was SVN r14534.

											
										
										
											2007-04-27 01:03:38 +04:00
+								    struct mca_btl_base_descriptor_t* descriptor
-												Added support for openib RDMA READ.. note that performance is currently an
issue so PUT is default.. We are determining if this is an openib issue or a
btl issue as we have seen performance increases on mvapi. 

This commit was SVN r6928.

											
										
										
											2005-08-18 21:08:27 +04:00
+								    );
-												Remove trailing whitespaces. No code changes in this commit.

This commit was SVN r17167.

											
										
										
											2008-01-21 15:11:18 +03:00
-												removed the ib ptl and fixed a few conflicts in my previous commit

This commit was SVN r6244.

											
										
										
											2005-07-01 01:28:35 +04:00
 								/**
 								 * Allocate a descriptor.
 								 *
 								 * @param btl (IN)      BTL module
 								 * @param size (IN)     Requested descriptor size.
 								 */
 								extern mca_btl_base_descriptor_t* mca_btl_openib_alloc(
-												Add endpoint parameter to btl_alloc() function. Enables various optimizations
inside BTL.

This commit was SVN r16898.

											
										
										
											2007-12-09 17:00:42 +03:00
+								        struct mca_btl_base_module_t* btl,
 								        struct mca_btl_base_endpoint_t* endpoint,
 								        uint8_t order,
-												Add flags parameter to btl_alloc() and btl_prepare_src() functions. If BTL
knows at the time of allocation priority of a descriptor it may do some
optimizations.

This commit was SVN r16901.

											
										
										
											2007-12-09 17:08:01 +03:00
+								        size_t size,
-												Remove trailing whitespaces. No code changes in this commit.

This commit was SVN r17167.

											
										
										
											2008-01-21 15:11:18 +03:00
+								        uint32_t flags);
-												removed the ib ptl and fixed a few conflicts in my previous commit

This commit was SVN r6244.

											
										
										
											2005-07-01 01:28:35 +04:00
 								/**
 								 * Return a segment allocated by this BTL.
 								 *
 								 * @param btl (IN)         BTL module
 								 * @param descriptor (IN)  Allocated descriptor.
 								 */
 								extern int mca_btl_openib_free(
-												Remove trailing whitespaces. No code changes in this commit.

This commit was SVN r17167.

											
										
										
											2008-01-21 15:11:18 +03:00
+								                               struct mca_btl_base_module_t* btl,
 								                               mca_btl_base_descriptor_t* des);
-												removed the ib ptl and fixed a few conflicts in my previous commit

This commit was SVN r6244.

											
										
										
											2005-07-01 01:28:35 +04:00
 								/**
 								 * Pack data and return a descriptor that can be
 								 * used for send/put.
 								 *
 								 * @param btl (IN)      BTL module
 								 * @param peer (IN)     BTL peer addressing
 								 */
 								mca_btl_base_descriptor_t* mca_btl_openib_prepare_src(
-												Initial commit of changes to the mvapi btl to the openib btl. Still need to
work on the configure.stub to correctly locate the ib libraries. 

This commit was SVN r6435.

											
										
										
											2005-07-12 17:38:54 +04:00
+								                                                      struct mca_btl_base_module_t* btl,
 								                                                      struct mca_btl_base_endpoint_t* peer,
-												Remove trailing whitespaces. No code changes in this commit.

This commit was SVN r17167.

											
										
										
											2008-01-21 15:11:18 +03:00
+								                                                      mca_mpool_base_registration_t* registration,
- - Split the datatype engine into two parts: an MPI specific part in
   OMPI
   and a language agnostic part in OPAL. The convertor is completely
   moved into OPAL.  This offers several benefits as described in RFC
   http://www.open-mpi.org/community/lists/devel/2009/07/6387.php
   namely:
    - Fewer basic types (int* and float* types, boolean and wchar
    - Fixing naming scheme to ompi-nomenclature.
    - Usability outside of the ompi-layer.
 - Due to the fixed nature of simple opal types, their information is
   completely
   known at compile time and therefore constified
 - With fewer datatypes (22), the actual sizes of bit-field types may be
   reduced
   from 64 to 32 bits, allowing reorganizing the opal_datatype
   structure, eliminating holes and keeping data required in convertor
   (upon send/recv) in one cacheline...
   This has implications to the convertor-datastructure and other parts
   of the code.
 - Several performance tests have been run, the netpipe latency does not
   change with
   this patch on Linux/x86-64 on the smoky cluster.
 - Extensive tests have been done to verify correctness (no new
   regressions) using:
   1. mpi_test_suite on linux/x86-64 using clean ompi-trunk and
    ompi-ddt:
    a. running both trunk and ompi-ddt resulted in no differences
       (except for MPI_SHORT_INT and MPI_TYPE_MIX_LB_UB do now run
       correctly).
    b. with --enable-memchecker and running under valgrind (one buglet
       when run with static found in test-suite, commited)
   2. ibm testsuite on linux/x86-64 using clean ompi-trunk and ompi-ddt:
      all passed (except for the dynamic/ tests failed!! as trunk/MTT)
   3. compilation and usage of HDF5 tests on Jaguar using PGI and
      PathScale compilers.
   4. compilation and usage on Scicortex.
 - Please note, that for the heterogeneous case, (-m32 compiled
   binaries/ompi), neither
   ompi-trunk, nor ompi-ddt branch would successfully launch.

This commit was SVN r21641.

											
										
										
											2009-07-13 08:56:31 +04:00
+								                                                      struct opal_convertor_t* convertor,
-												Add optional ordering to the BTL interface. 
This is required to tighten up the BTL semantics. Ordering is not guaranteed,
but, if the BTL returns a order tag in a descriptor (other than
MCA_BTL_NO_ORDER) then we may request another descriptor that will obey
ordering w.r.t. to the other descriptor.


This will allow sane behavior for RDMA networks, where local completion of an
RDMA operation on the active side does not imply remote completion on the
passive side. If we send a FIN message after local completion and the FIN is
not ordered w.r.t. the RDMA operation then badness may occur as the passive
side may now try to deregister the memory and the RDMA operation may still be
pending on the passive side. 

Note that this has no impact on networks that don't suffer from this
limitation as the ORDER tag can simply always be specified as
MCA_BTL_NO_ORDER.

This commit was SVN r14768.

											
										
										
											2007-05-24 23:51:26 +04:00
+								                                                      uint8_t order,
-												Initial commit of changes to the mvapi btl to the openib btl. Still need to
work on the configure.stub to correctly locate the ib libraries. 

This commit was SVN r6435.

											
										
										
											2005-07-12 17:38:54 +04:00
+								                                                      size_t reserve,
-												Add flags parameter to btl_alloc() and btl_prepare_src() functions. If BTL
knows at the time of allocation priority of a descriptor it may do some
optimizations.

This commit was SVN r16901.

											
										
										
											2007-12-09 17:08:01 +03:00
+								                                                      size_t* size,
 								                                                      uint32_t flags
-												Initial commit of changes to the mvapi btl to the openib btl. Still need to
work on the configure.stub to correctly locate the ib libraries. 

This commit was SVN r6435.

											
										
										
											2005-07-12 17:38:54 +04:00
+								                                                      );
-												removed the ib ptl and fixed a few conflicts in my previous commit

This commit was SVN r6244.

											
										
										
											2005-07-01 01:28:35 +04:00
 								/**
 								 * Allocate a descriptor initialized for RDMA write.
 								 *
 								 * @param btl (IN)      BTL module
 								 * @param peer (IN)     BTL peer addressing
 								 */
-												Remove trailing whitespaces. No code changes in this commit.

This commit was SVN r17167.

											
										
										
											2008-01-21 15:11:18 +03:00
+								extern mca_btl_base_descriptor_t* mca_btl_openib_prepare_dst(
 								                                                             struct mca_btl_base_module_t* btl,
-												Initial commit of changes to the mvapi btl to the openib btl. Still need to
work on the configure.stub to correctly locate the ib libraries. 

This commit was SVN r6435.

											
										
										
											2005-07-12 17:38:54 +04:00
+								                                                             struct mca_btl_base_endpoint_t* peer,
-												Remove trailing whitespaces. No code changes in this commit.

This commit was SVN r17167.

											
										
										
											2008-01-21 15:11:18 +03:00
+								                                                             mca_mpool_base_registration_t* registration,
- - Split the datatype engine into two parts: an MPI specific part in
   OMPI
   and a language agnostic part in OPAL. The convertor is completely
   moved into OPAL.  This offers several benefits as described in RFC
   http://www.open-mpi.org/community/lists/devel/2009/07/6387.php
   namely:
    - Fewer basic types (int* and float* types, boolean and wchar
    - Fixing naming scheme to ompi-nomenclature.
    - Usability outside of the ompi-layer.
 - Due to the fixed nature of simple opal types, their information is
   completely
   known at compile time and therefore constified
 - With fewer datatypes (22), the actual sizes of bit-field types may be
   reduced
   from 64 to 32 bits, allowing reorganizing the opal_datatype
   structure, eliminating holes and keeping data required in convertor
   (upon send/recv) in one cacheline...
   This has implications to the convertor-datastructure and other parts
   of the code.
 - Several performance tests have been run, the netpipe latency does not
   change with
   this patch on Linux/x86-64 on the smoky cluster.
 - Extensive tests have been done to verify correctness (no new
   regressions) using:
   1. mpi_test_suite on linux/x86-64 using clean ompi-trunk and
    ompi-ddt:
    a. running both trunk and ompi-ddt resulted in no differences
       (except for MPI_SHORT_INT and MPI_TYPE_MIX_LB_UB do now run
       correctly).
    b. with --enable-memchecker and running under valgrind (one buglet
       when run with static found in test-suite, commited)
   2. ibm testsuite on linux/x86-64 using clean ompi-trunk and ompi-ddt:
      all passed (except for the dynamic/ tests failed!! as trunk/MTT)
   3. compilation and usage of HDF5 tests on Jaguar using PGI and
      PathScale compilers.
   4. compilation and usage on Scicortex.
 - Please note, that for the heterogeneous case, (-m32 compiled
   binaries/ompi), neither
   ompi-trunk, nor ompi-ddt branch would successfully launch.

This commit was SVN r21641.

											
										
										
											2009-07-13 08:56:31 +04:00
+								                                                             struct opal_convertor_t* convertor,
-												Add optional ordering to the BTL interface. 
This is required to tighten up the BTL semantics. Ordering is not guaranteed,
but, if the BTL returns a order tag in a descriptor (other than
MCA_BTL_NO_ORDER) then we may request another descriptor that will obey
ordering w.r.t. to the other descriptor.


This will allow sane behavior for RDMA networks, where local completion of an
RDMA operation on the active side does not imply remote completion on the
passive side. If we send a FIN message after local completion and the FIN is
not ordered w.r.t. the RDMA operation then badness may occur as the passive
side may now try to deregister the memory and the RDMA operation may still be
pending on the passive side. 

Note that this has no impact on networks that don't suffer from this
limitation as the ORDER tag can simply always be specified as
MCA_BTL_NO_ORDER.

This commit was SVN r14768.

											
										
										
											2007-05-24 23:51:26 +04:00
+								                                                             uint8_t order,
-												Initial commit of changes to the mvapi btl to the openib btl. Still need to
work on the configure.stub to correctly locate the ib libraries. 

This commit was SVN r6435.

											
										
										
											2005-07-12 17:38:54 +04:00
+								                                                             size_t reserve,
-												Add flags parameter to btl_alloc() and btl_prepare_src() functions. If BTL
knows at the time of allocation priority of a descriptor it may do some
optimizations.

This commit was SVN r16901.

											
										
										
											2007-12-09 17:08:01 +03:00
+								                                                             size_t* size,
-												Remove trailing whitespaces. No code changes in this commit.

This commit was SVN r17167.

											
										
										
											2008-01-21 15:11:18 +03:00
+								                                                             uint32_t flags);
-												removed the ib ptl and fixed a few conflicts in my previous commit

This commit was SVN r6244.

											
										
										
											2005-07-01 01:28:35 +04:00
-												Process pending put/get frags on endpoint connection establishment.

This commit was SVN r16785.

											
										
										
											2007-11-28 10:16:52 +03:00
+								extern void mca_btl_openib_frag_progress_pending_put_get(
 								        struct mca_btl_base_endpoint_t*, const int);
-												Merging in the jjhursey-ft-cr-stable branch (r13912 : HEAD).

This merge adds Checkpoint/Restart support to Open MPI. The initial
frameworks and components support a LAM/MPI-like implementation.

This commit follows the risk assessment presented to the Open MPI core
development group on Feb. 22, 2007.

This commit closes trac:158

More details to follow.

This commit was SVN r14051.

The following SVN revisions from the original message are invalid or
inconsistent and therefore were not cross-referenced:
  r13912

The following Trac tickets were found above:
  Ticket 158 --> https://svn.open-mpi.org/trac/ompi/ticket/158

											
										
										
											2007-03-17 02:11:45 +03:00
+								/**
 								 * Fault Tolerance Event Notification Function
-												 - Add a few comments for elements for structs, a few spelling fixes.
   No functional change.

This commit was SVN r14534.

											
										
										
											2007-04-27 01:03:38 +04:00
+								 *
 								 * @param state (IN)  Checkpoint State
-												Merging in the jjhursey-ft-cr-stable branch (r13912 : HEAD).

This merge adds Checkpoint/Restart support to Open MPI. The initial
frameworks and components support a LAM/MPI-like implementation.

This commit follows the risk assessment presented to the Open MPI core
development group on Feb. 22, 2007.

This commit closes trac:158

More details to follow.

This commit was SVN r14051.

The following SVN revisions from the original message are invalid or
inconsistent and therefore were not cross-referenced:
  r13912

The following Trac tickets were found above:
  Ticket 158 --> https://svn.open-mpi.org/trac/ompi/ticket/158

											
										
										
											2007-03-17 02:11:45 +03:00
+								 * @return OMPI_SUCCESS or failure status
 								 */
-												 - Add a few comments for elements for structs, a few spelling fixes.
   No functional change.

This commit was SVN r14534.

											
										
										
											2007-04-27 01:03:38 +04:00
+								extern int mca_btl_openib_ft_event(int state);
-												Merging in the jjhursey-ft-cr-stable branch (r13912 : HEAD).

This merge adds Checkpoint/Restart support to Open MPI. The initial
frameworks and components support a LAM/MPI-like implementation.

This commit follows the risk assessment presented to the Open MPI core
development group on Feb. 22, 2007.

This commit closes trac:158

More details to follow.

This commit was SVN r14051.

The following SVN revisions from the original message are invalid or
inconsistent and therefore were not cross-referenced:
  r13912

The following Trac tickets were found above:
  Ticket 158 --> https://svn.open-mpi.org/trac/ompi/ticket/158

											
										
										
											2007-03-17 02:11:45 +03:00
-												Also show the "you might not have enough registered memory" warning
message earlier in the openib BTL startup sequence

This commit was SVN r21469.

											
										
										
											2009-06-18 16:24:39 +04:00
+								/**
 								 * Show an error during init, particularly when running out of
 								 * registered memory.
 								 */
 								void mca_btl_openib_show_init_error(const char *file, int line,
 								                                    const char *func, const char *dev);
-												This commit brings in two major things:

1. Galen's fine-grain control of queue pair resources in the openib
   BTL.
1. Pasha's new implementation of asychronous HCA event handling.

Pasha's new implementation doesn't take much explanation, but the new
"multifrag" stuff does.  

Note that "svn merge" was not used to bring this new code from the
/tmp/ib_multifrag branch -- something Bad happened in the periodic
trunk pulls on that branch making an actual merge back to the trunk
effectively impossible (i.e., lots and lots of arbitrary conflicts and
artifical changes).  :-(

== Fine-grain control of queue pair resources ==

Galen's fine-grain control of queue pair resources to the OpenIB BTL
(thanks to Gleb for fixing broken code and providing additional
functionality, Pasha for finding broken code, and Jeff for doing all
the svn work and regression testing).

Prior to this commit, the OpenIB BTL created two queue pairs: one for
eager size fragments and one for max send size fragments.  When the
use of the shared receive queue (SRQ) was specified (via "-mca
btl_openib_use_srq 1"), these QPs would use a shared receive queue for
receive buffers instead of the default per-peer (PP) receive queues
and buffers.  One consequence of this design is that receive buffer
utilization (the size of the data received as a percentage of the
receive buffer used for the data) was quite poor for a number of
applications.

The new design allows multiple QPs to be specified at runtime.  Each
QP can be setup to use PP or SRQ receive buffers as well as giving
fine-grained control over receive buffer size, number of receive
buffers to post, when to replenish the receive queue (low water mark)
and for SRQ QPs, the number of outstanding sends can also be
specified.  The following is an example of the syntax to describe QPs
to the OpenIB BTL using the new MCA parameter btl_openib_receive_queues:

{{{
-mca btl_openib_receive_queues \
     "P,128,16,4;S,1024,256,128,32;S,4096,256,128,32;S,65536,256,128,32"
}}}

Each QP description is delimited by ";" (semicolon) with individual
fields of the QP description delimited by "," (comma).  The above
example therefore describes 4 QPs.

The first QP is:

    P,128,16,4

Meaning: per-peer receive buffer QPs are indicated by a starting field
of "P"; the first QP (shown above) is therefore a per-peer based QP.
The second field indicates the size of the receive buffer in bytes
(128 bytes).  The third field indicates the number of receive buffers
to allocate to the QP (16).  The fourth field indicates the low
watermark for receive buffers at which time the BTL will repost
receive buffers to the QP (4).

The second QP is:

    S,1024,256,128,32

Shared receive queue based QPs are indicated by a starting field of
"S"; the second QP (shown above) is therefore a shared receive queue
based QP.  The second, third and fourth fields are the same as in the
per-peer based QP.  The fifth field is the number of outstanding sends
that are allowed at a given time on the QP (32).  This provides a
"good enough" mechanism of flow control for some regular communication
patterns.

QPs MUST be specified in ascending receive buffer size order.  This
requirement may be removed prior to 1.3 release.

This commit was SVN r15474.

											
										
										
											2007-07-18 05:15:59 +04:00
+								#define BTL_OPENIB_HP_CQ 0
 								#define BTL_OPENIB_LP_CQ 1
-												consolidate part of HP/LP fields.

This commit was SVN r11528.

											
										
										
											2006-09-05 20:00:18 +04:00
-												 - Add a few comments for elements for structs, a few spelling fixes.
   No functional change.

This commit was SVN r14534.

											
										
										
											2007-04-27 01:03:38 +04:00
+								/**
-												Remove trailing whitespaces. No code changes in this commit.

This commit was SVN r17167.

											
										
										
											2008-01-21 15:11:18 +03:00
+								 * Post to Shared Receive Queue with certain priority
-												 - Add a few comments for elements for structs, a few spelling fixes.
   No functional change.

This commit was SVN r14534.

											
										
										
											2007-04-27 01:03:38 +04:00
+								 *
 								 * @param openib_btl (IN) BTL module
 								 * @param additional (IN) Additional Bytes to reserve
 								 * @param prio (IN)       Priority (either BTL_OPENIB_HP_QP or BTL_OPENIB_LP_QP)
 								 * @return OMPI_SUCCESS or failure status
 								 */
-												Uninline mca_btl_openib_post_srr() function.

This commit was SVN r16797.

											
										
										
											2007-11-28 17:52:31 +03:00
+								int mca_btl_openib_post_srr(mca_btl_openib_module_t* openib_btl, const int qp);
-												Consolidate receive buffers prepost code for HP/LP QPs.

This commit was SVN r11552.

											
										
										
											2006-09-07 17:05:41 +04:00
-												Improving support for non homogeneous OpenFabrics network configurations

This commit was SVN r22312.

											
										
										
											2009-12-15 17:25:07 +03:00
+								/**
 								 * Get a transport name of btl by its transport type.
 								 */
 								const char* btl_openib_get_transport_name(mca_btl_openib_transport_type_t transport_type);
 								/**
 								 * Get a transport type of btl.
 								 */
 								mca_btl_openib_transport_type_t mca_btl_openib_get_transport_type(mca_btl_openib_module_t* openib_btl);
-												Make xrc use srq_qp unions instead of the xrc_qp which is exactly like srq_qp.

This commit was SVN r16789.

											
										
										
											2007-11-28 10:20:26 +03:00
+								static inline int qp_cq_prio(const int qp)
-												Initial XRC support by Mellanox.

This commit was SVN r16787.

											
										
										
											2007-11-28 10:18:59 +03:00
+								{
-												Make xrc use srq_qp unions instead of the xrc_qp which is exactly like srq_qp.

This commit was SVN r16789.

											
										
										
											2007-11-28 10:20:26 +03:00
+								    if(0 == qp)
 								        return BTL_OPENIB_HP_CQ; /* smallest qp is always HP */
 								    /* If the size for this qp is <= the eager limit, make it a
 								       high priority QP.  Otherwise, make it a low priority QP. */
 								    return (mca_btl_openib_component.qp_infos[qp].size <=
 								            mca_btl_openib_component.eager_limit) ?
 								        BTL_OPENIB_HP_CQ : BTL_OPENIB_LP_CQ;
-												Initial XRC support by Mellanox.

This commit was SVN r16787.

											
										
										
											2007-11-28 10:18:59 +03:00
+								}
-												This commit brings in two major things:

1. Galen's fine-grain control of queue pair resources in the openib
   BTL.
1. Pasha's new implementation of asychronous HCA event handling.

Pasha's new implementation doesn't take much explanation, but the new
"multifrag" stuff does.  

Note that "svn merge" was not used to bring this new code from the
/tmp/ib_multifrag branch -- something Bad happened in the periodic
trunk pulls on that branch making an actual merge back to the trunk
effectively impossible (i.e., lots and lots of arbitrary conflicts and
artifical changes).  :-(

== Fine-grain control of queue pair resources ==

Galen's fine-grain control of queue pair resources to the OpenIB BTL
(thanks to Gleb for fixing broken code and providing additional
functionality, Pasha for finding broken code, and Jeff for doing all
the svn work and regression testing).

Prior to this commit, the OpenIB BTL created two queue pairs: one for
eager size fragments and one for max send size fragments.  When the
use of the shared receive queue (SRQ) was specified (via "-mca
btl_openib_use_srq 1"), these QPs would use a shared receive queue for
receive buffers instead of the default per-peer (PP) receive queues
and buffers.  One consequence of this design is that receive buffer
utilization (the size of the data received as a percentage of the
receive buffer used for the data) was quite poor for a number of
applications.

The new design allows multiple QPs to be specified at runtime.  Each
QP can be setup to use PP or SRQ receive buffers as well as giving
fine-grained control over receive buffer size, number of receive
buffers to post, when to replenish the receive queue (low water mark)
and for SRQ QPs, the number of outstanding sends can also be
specified.  The following is an example of the syntax to describe QPs
to the OpenIB BTL using the new MCA parameter btl_openib_receive_queues:

{{{
-mca btl_openib_receive_queues \
     "P,128,16,4;S,1024,256,128,32;S,4096,256,128,32;S,65536,256,128,32"
}}}

Each QP description is delimited by ";" (semicolon) with individual
fields of the QP description delimited by "," (comma).  The above
example therefore describes 4 QPs.

The first QP is:

    P,128,16,4

Meaning: per-peer receive buffer QPs are indicated by a starting field
of "P"; the first QP (shown above) is therefore a per-peer based QP.
The second field indicates the size of the receive buffer in bytes
(128 bytes).  The third field indicates the number of receive buffers
to allocate to the QP (16).  The fourth field indicates the low
watermark for receive buffers at which time the BTL will repost
receive buffers to the QP (4).

The second QP is:

    S,1024,256,128,32

Shared receive queue based QPs are indicated by a starting field of
"S"; the second QP (shown above) is therefore a shared receive queue
based QP.  The second, third and fourth fields are the same as in the
per-peer based QP.  The fifth field is the number of outstanding sends
that are allowed at a given time on the QP (32).  This provides a
"good enough" mechanism of flow control for some regular communication
patterns.

QPs MUST be specified in ascending receive buffer size order.  This
requirement may be removed prior to 1.3 release.

This commit was SVN r15474.

											
										
										
											2007-07-18 05:15:59 +04:00
+								#define BTL_OPENIB_RDMA_QP(QP) \
 								    ((QP) == mca_btl_openib_component.rdma_qp)
-												Bring over the functionality from the /tmp/jnysal-openib-wireup
branch:

 * Support btl_openib_if_include and btl_openib_if_exclude MCA
   parameters, similar to those supported by other BTLs.  Each take a
   comma-delimited lists of identifiers.  Identifiers can be HCA
   interface names (e.g., ipath0, mthca1, etc.)  or an HCA interface
   name and port numbers (e.g., ipath0:1, mthca1:2, etc.).  It is an
   error to specify both _include and _exclude.  If you specify a
   non-existant (or non-ACTIVE) HCA and/or port, you'll get a warning
   unless you disable the warning by setting the MCA parameter
   btl_openib_warn_nonexistent_if to 0.
 * Start updating to use BEGIN_C_DECLS and END_C_DECLS
 * A few other minor fixes that were picked up along the way.

This commit was SVN r15063.

											
										
										
											2007-06-14 05:59:25 +04:00
+								END_C_DECLS
-												 - Add a few comments for elements for structs, a few spelling fixes.
   No functional change.

This commit was SVN r14534.

											
										
										
											2007-04-27 01:03:38 +04:00
+								#endif /* MCA_BTL_IB_H */