94593ca20b
modified: ../orte/mca/rml/base/rml_base_frame.c modified: ../orte/mca/rml/base/rml_base_stubs.c deleted: ../orte/mca/rml/ofi/.opal_ignore modified: ../orte/mca/rml/ofi/Makefile.am modified: ../orte/mca/rml/ofi/rml_ofi.h modified: ../orte/mca/rml/ofi/rml_ofi_component.c modified: ../orte/mca/rml/ofi/rml_ofi_send.c modified: ../orte/test/system/ofi_conduit_stress.c Removed stale include directive modified: ../orte/mca/rml/ofi/Makefile.am The ofi plugin supports multiple providers, and identifies them by ofi_prov_id, changed the previous name conduit_id to ofi_prov_id modified: ../orte/mca/rml/base/base.h modified: ../orte/mca/rml/ofi/rml_ofi.h modified: ../orte/mca/rml/ofi/rml_ofi_component.c modified: ../orte/mca/rml/ofi/rml_ofi_request.h modified: ../orte/mca/rml/ofi/rml_ofi_send.c Adding ofi plugin to allow for opening a conduit to use ethernet/fabric. modified: ../orte/mca/rml/base/rml_base_frame.c modified: ../orte/mca/rml/base/rml_base_stubs.c deleted: ../orte/mca/rml/ofi/.opal_ignore modified: ../orte/mca/rml/ofi/Makefile.am modified: ../orte/mca/rml/ofi/rml_ofi.h modified: ../orte/mca/rml/ofi/rml_ofi_component.c modified: ../orte/mca/rml/ofi/rml_ofi_send.c modified: ../orte/test/system/ofi_conduit_stress.c Removed stale include directive modified: ../orte/mca/rml/ofi/Makefile.am The ofi plugin supports multiple providers, and identifies them by ofi_prov_id, changed the previous name conduit_id to ofi_prov_id modified: ../orte/mca/rml/base/base.h modified: ../orte/mca/rml/ofi/rml_ofi.h modified: ../orte/mca/rml/ofi/rml_ofi_component.c modified: ../orte/mca/rml/ofi/rml_ofi_request.h modified: ../orte/mca/rml/ofi/rml_ofi_send.c Fixed merge issues, and minor pull-request comments modified: ../orte/mca/rml/base/base.h modified: ../orte/mca/rml/base/rml_base_frame.c modified: ../orte/mca/rml/ofi/rml_ofi.h modified: ../orte/mca/rml/ofi/rml_ofi_component.c Adding ofi plugin to allow for opening a conduit to use ethernet/fabric. modified: ../orte/mca/rml/base/rml_base_frame.c modified: ../orte/mca/rml/base/rml_base_stubs.c deleted: ../orte/mca/rml/ofi/.opal_ignore modified: ../orte/mca/rml/ofi/Makefile.am modified: ../orte/mca/rml/ofi/rml_ofi.h modified: ../orte/mca/rml/ofi/rml_ofi_component.c modified: ../orte/mca/rml/ofi/rml_ofi_send.c modified: ../orte/test/system/ofi_conduit_stress.c Removed stale include directive modified: ../orte/mca/rml/ofi/Makefile.am The ofi plugin supports multiple providers, and identifies them by ofi_prov_id, changed the previous name conduit_id to ofi_prov_id modified: ../orte/mca/rml/base/base.h modified: ../orte/mca/rml/ofi/rml_ofi.h modified: ../orte/mca/rml/ofi/rml_ofi_component.c modified: ../orte/mca/rml/ofi/rml_ofi_request.h modified: ../orte/mca/rml/ofi/rml_ofi_send.c Adding ofi plugin to allow for opening a conduit to use ethernet/fabric. modified: ../orte/mca/rml/base/rml_base_frame.c modified: ../orte/mca/rml/base/rml_base_stubs.c deleted: ../orte/mca/rml/ofi/.opal_ignore modified: ../orte/mca/rml/ofi/Makefile.am modified: ../orte/mca/rml/ofi/rml_ofi.h modified: ../orte/mca/rml/ofi/rml_ofi_component.c modified: ../orte/mca/rml/ofi/rml_ofi_send.c modified: ../orte/test/system/ofi_conduit_stress.c Removed stale include directive modified: ../orte/mca/rml/ofi/Makefile.am Fixed merge issues, and minor pull-request comments modified: ../orte/mca/rml/base/base.h modified: ../orte/mca/rml/base/rml_base_frame.c modified: ../orte/mca/rml/ofi/rml_ofi.h modified: ../orte/mca/rml/ofi/rml_ofi_component.c Removed trailing space modified: ../orte/mca/rml/ofi/rml_ofi_component.c Cleaned up test- ofi_conduit_stress.c modified: ../orte/test/system/ofi_conduit_stress.c cleaned up printing the provider info during initialisation modified: ../orte/mca/rml/ofi/rml_ofi.h modified: ../orte/mca/rml/ofi/rml_ofi_component.c Signed-off-by: Anandhi S Jayakumar <anandhi.s.jayakumar@intel.com> Fixing warnings modified: ../orte/mca/rml/ofi/rml_ofi.h modified: ../orte/mca/rml/ofi/rml_ofi_component.c modified: ../orte/mca/rml/ofi/rml_ofi_send.c Signed-off-by: Anandhi S Jayakumar <anandhi.s.jayakumar@intel.com> minor cleanup modified: ../orte/mca/rml/ofi/rml_ofi_component.c modified: ../orte/mca/rml/ofi/rml_ofi_send.c Signed-off-by: Anandhi S Jayakumar <anandhi.s.jayakumar@intel.com> more cleanup modified: ../orte/mca/rml/ofi/rml_ofi_component.c Signed-off-by: Anandhi S Jayakumar <anandhi.s.jayakumar@intel.com> Sending the ethernet address only in the get_contact_info, rest will be sent through modex modified: ../orte/mca/rml/ofi/rml_ofi.h modified: ../orte/mca/rml/ofi/rml_ofi_component.c Signed-off-by: Anandhi S Jayakumar <anandhi.s.jayakumar@intel.com> Adding error logging on failures modified: ../orte/mca/rml/ofi/rml_ofi_component.c Signed-off-by: Anandhi S Jayakumar <anandhi.s.jayakumar@intel.com> Handling the OPAL_MODEX_SEND/RECV generically for all ofi providers. modified: ../orte/mca/rml/ofi/rml_ofi.h modified: ../orte/mca/rml/ofi/rml_ofi_component.c modified: ../orte/mca/rml/ofi/rml_ofi_send.c Signed-off-by: Anandhi S Jayakumar <anandhi.s.jayakumar@intel.com> Adding to build ofi for limited people new file: ../orte/mca/rml/ofi/.opal_ignore new file: ../orte/mca/rml/ofi/.opal_unignore Signed-off-by: Anandhi S Jayakumar <anandhi.s.jayakumar@intel.com> Removign the error logging for now modified: ../orte/mca/rml/ofi/rml_ofi_component.c
206 строки
6.4 KiB
C
206 строки
6.4 KiB
C
/*
|
|
* Copyright (c) 2015 Intel, Inc. All rights reserved
|
|
* $COPYRIGHT$
|
|
*
|
|
* Additional copyrights may follow
|
|
*
|
|
* $HEADER$
|
|
*/
|
|
|
|
#ifndef MCA_RML_OFI_RML_OFI_H
|
|
#define MCA_RML_OFI_RML_OFI_H
|
|
|
|
#include "orte_config.h"
|
|
|
|
#include "opal/dss/dss_types.h"
|
|
#include "opal/mca/event/event.h"
|
|
#include "opal/mca/pmix/pmix.h"
|
|
#include "orte/mca/rml/base/base.h"
|
|
|
|
#include <rdma/fabric.h>
|
|
#include <rdma/fi_cm.h>
|
|
#include <rdma/fi_domain.h>
|
|
#include <rdma/fi_endpoint.h>
|
|
#include <rdma/fi_errno.h>
|
|
#include <rdma/fi_tagged.h>
|
|
|
|
#include "rml_ofi_request.h"
|
|
|
|
/** the maximum open OFI ofi_prov - assuming system will have no more than 20 transports*/
|
|
#define MAX_OFI_PROVIDERS 40
|
|
#define RML_OFI_PROV_ID_INVALID 0xFF
|
|
|
|
/** RML/OFI key values **/
|
|
/* (char*) ofi socket address (type IN) of the node process is running on */
|
|
#define OPAL_RML_OFI_FI_SOCKADDR_IN "rml.ofi.fisockaddrin"
|
|
/* (char*) ofi socket address (type PSM) of the node process is running on */
|
|
#define OPAL_RML_OFI_FI_ADDR_PSMX "rml.ofi.fiaddrpsmx"
|
|
|
|
// MULTI_BUF_SIZE_FACTOR defines how large the multi recv buffer will be.
|
|
// In order to use FI_MULTI_RECV feature efficiently, we need to have a
|
|
// large recv buffer so that we don't need to repost the buffer often to
|
|
// get the remaining data when the buffer is full
|
|
#define MULTI_BUF_SIZE_FACTOR 128
|
|
#define MIN_MULTI_BUF_SIZE (1024 * 1024)
|
|
|
|
#define OFIADDR "ofiaddr"
|
|
|
|
#define CLOSE_FID(fd) \
|
|
do { \
|
|
int _ret = 0; \
|
|
if (0 != (fd)) { \
|
|
_ret = fi_close(&(fd)->fid); \
|
|
fd = NULL; \
|
|
if (0 != _ret) { \
|
|
opal_output_verbose(10,orte_rml_base_framework.framework_output, \
|
|
" %s - fi_close failed with error- %d", \
|
|
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),ret); \
|
|
} \
|
|
} \
|
|
} while (0);
|
|
|
|
|
|
#define RML_OFI_RETRY_UNTIL_DONE(FUNC) \
|
|
do { \
|
|
do { \
|
|
ret = FUNC; \
|
|
if(OPAL_LIKELY(0 == ret)) {break;} \
|
|
} while(-FI_EAGAIN == ret); \
|
|
} while(0);
|
|
|
|
BEGIN_C_DECLS
|
|
|
|
struct orte_rml_ofi_module_t;
|
|
|
|
/** This structure will hold the ep and all ofi objects for each transport
|
|
and also the corresponding fi_info
|
|
**/
|
|
typedef struct {
|
|
|
|
/** ofi provider ID **/
|
|
uint8_t ofi_prov_id;
|
|
|
|
/** fi_info for this transport */
|
|
struct fi_info *fabric_info;
|
|
|
|
/** Fabric Domain handle */
|
|
struct fid_fabric *fabric;
|
|
|
|
/** Access Domain handle */
|
|
struct fid_domain *domain;
|
|
|
|
/** Address vector handle */
|
|
struct fid_av *av;
|
|
|
|
/** Completion queue handle */
|
|
struct fid_cq *cq;
|
|
|
|
/** Endpoint to communicate on */
|
|
struct fid_ep *ep;
|
|
|
|
/** Endpoint name */
|
|
char ep_name[FI_NAME_MAX];
|
|
|
|
/** Endpoint name length */
|
|
size_t epnamelen;
|
|
|
|
/** OFI memory region */
|
|
struct fid_mr *mr_multi_recv;
|
|
|
|
/** buffer for tx and rx */
|
|
void *rxbuf;
|
|
|
|
uint64_t rxbuf_size;
|
|
|
|
/* event,fd associated with the cq */
|
|
int fd;
|
|
|
|
/*event associated with progress fn */
|
|
opal_event_t progress_event;
|
|
bool progress_ev_active;
|
|
|
|
struct fi_context rx_ctx1;
|
|
|
|
} ofi_transport_ofi_prov_t;
|
|
|
|
|
|
struct orte_rml_ofi_module_t {
|
|
orte_rml_base_module_t api;
|
|
|
|
/** current ofi transport id the component is using, this will be initialised
|
|
** in the open_ofi_prov() call **/
|
|
int cur_transport_id;
|
|
|
|
/** Fabric info structure of all supported transports in system **/
|
|
struct fi_info *fi_info_list;
|
|
|
|
/** OFI ep and corr fi_info for all the transports (ofi_providers) **/
|
|
ofi_transport_ofi_prov_t ofi_prov[MAX_OFI_PROVIDERS];
|
|
|
|
size_t min_ofi_recv_buf_sz;
|
|
|
|
/** "Any source" address */
|
|
fi_addr_t any_addr;
|
|
|
|
/** number of ofi providers currently opened **/
|
|
uint8_t ofi_prov_open_num;
|
|
|
|
/** Unique message id for every message that is fragmented to be sent over OFI **/
|
|
uint32_t cur_msgid;
|
|
|
|
/* hashtable stores the peer addresses */
|
|
opal_hash_table_t peers;
|
|
|
|
opal_list_t recv_msg_queue_list;
|
|
opal_list_t queued_routing_messages;
|
|
opal_event_t *timer_event;
|
|
struct timeval timeout;
|
|
} ;
|
|
typedef struct orte_rml_ofi_module_t orte_rml_ofi_module_t;
|
|
|
|
typedef struct {
|
|
opal_object_t super;
|
|
void* ofi_ep;
|
|
size_t ofi_ep_len;
|
|
} orte_rml_ofi_peer_t;
|
|
OBJ_CLASS_DECLARATION(orte_rml_ofi_peer_t);
|
|
|
|
ORTE_MODULE_DECLSPEC extern orte_rml_component_t mca_rml_ofi_component;
|
|
extern orte_rml_ofi_module_t orte_rml_ofi;
|
|
|
|
int orte_rml_ofi_send_buffer_nb(struct orte_rml_base_module_t *mod,
|
|
orte_process_name_t* peer,
|
|
struct opal_buffer_t* buffer,
|
|
orte_rml_tag_t tag,
|
|
orte_rml_buffer_callback_fn_t cbfunc,
|
|
void* cbdata);
|
|
int orte_rml_ofi_send_nb(struct orte_rml_base_module_t *mod,
|
|
orte_process_name_t* peer,
|
|
struct iovec* iov,
|
|
int count,
|
|
orte_rml_tag_t tag,
|
|
orte_rml_callback_fn_t cbfunc,
|
|
void* cbdata);
|
|
|
|
/****************** INTERNAL OFI Functions*************/
|
|
void free_ofi_prov_resources( int ofi_prov_id);
|
|
void print_provider_list_info (struct fi_info *fi );
|
|
void print_provider_info (struct fi_info *cur_fi );
|
|
int cq_progress_handler(int sd, short flags, void *cbdata);
|
|
int get_ofi_prov_id( opal_list_t *attributes);
|
|
|
|
/** Send callback */
|
|
int orte_rml_ofi_send_callback(struct fi_cq_data_entry *wc,
|
|
orte_rml_ofi_request_t*);
|
|
|
|
/** Error callback */
|
|
int orte_rml_ofi_error_callback(struct fi_cq_err_entry *error,
|
|
orte_rml_ofi_request_t*);
|
|
|
|
/* OFI Recv handler */
|
|
int orte_rml_ofi_recv_handler(struct fi_cq_data_entry *wc, uint8_t ofi_prov_id);
|
|
|
|
END_C_DECLS
|
|
|
|
#endif
|