204 строки
6.4 KiB
C
204 строки
6.4 KiB
C
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
|
/*
|
|
* Copyright (c) 2013 Mellanox Technologies, Inc.
|
|
* All rights reserved.
|
|
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
|
|
* reserved.
|
|
* Copyright (c) 2016 Research Organization for Information Science
|
|
* and Technology (RIST). All rights reserved.
|
|
* $COPYRIGHT$
|
|
*
|
|
* Additional copyrights may follow
|
|
*
|
|
* $HEADER$
|
|
*/
|
|
/**
|
|
* @file
|
|
*/
|
|
|
|
#ifndef MCA_SPML_UD_MXM_H
|
|
#define MCA_SPML_UD_MXM_H
|
|
|
|
#include "oshmem_config.h"
|
|
#include "oshmem/request/request.h"
|
|
#include "oshmem/mca/spml/spml.h"
|
|
#include "oshmem/util/oshmem_util.h"
|
|
#include "oshmem/mca/spml/base/spml_base_putreq.h"
|
|
#include "oshmem/proc/proc.h"
|
|
#include "oshmem/mca/spml/base/spml_base_request.h"
|
|
#include "oshmem/mca/spml/base/spml_base_getreq.h"
|
|
|
|
#include "ompi/mca/bml/base/base.h"
|
|
#include "opal/class/opal_free_list.h"
|
|
#include "opal/class/opal_list.h"
|
|
|
|
#include "orte/runtime/orte_globals.h"
|
|
#include "oshmem/mca/memheap/base/base.h"
|
|
|
|
#include <mxm/api/mxm_api.h>
|
|
|
|
#ifndef MXM_VERSION
|
|
#define MXM_VERSION(major, minor) (((major)<<MXM_MAJOR_BIT)|((minor)<<MXM_MINOR_BIT))
|
|
#endif
|
|
|
|
#define MXM_SHMEM_MQ_ID 0x7119
|
|
|
|
/* start request explicit ack once our buffer pool is less than watermark */
|
|
#define SPML_IKRIT_PUT_LOW_WATER 16
|
|
/* request explicit ack (SYNC) per every X put requests per connection */
|
|
#define SPML_IKRIT_PACKETS_PER_SYNC 64
|
|
|
|
#define spml_ikrit_container_of(ptr, type, member) ( \
|
|
(type *)( ((char *)(ptr)) - offsetof(type,member) ))
|
|
|
|
#define MXM_MAX_ADDR_LEN 512
|
|
|
|
#define MXM_PTL_RDMA 0
|
|
#define MXM_PTL_SHM 1
|
|
#define MXM_PTL_LAST 2
|
|
|
|
BEGIN_C_DECLS
|
|
|
|
/**
|
|
* MXM SPML module
|
|
*/
|
|
/* TODO: move va_xx to base struct */
|
|
struct spml_ikrit_mkey {
|
|
mkey_segment_t super;
|
|
mxm_mem_key_t key;
|
|
};
|
|
typedef struct spml_ikrit_mkey spml_ikrit_mkey_t;
|
|
|
|
struct mxm_peer {
|
|
mxm_conn_h mxm_conn;
|
|
mxm_conn_h mxm_hw_rdma_conn;
|
|
uint8_t ptl_id;
|
|
uint8_t need_fence;
|
|
int32_t n_active_puts;
|
|
opal_list_item_t link;
|
|
spml_ikrit_mkey_t mkeys[MCA_MEMHEAP_SEG_COUNT];
|
|
};
|
|
|
|
typedef struct mxm_peer mxm_peer_t;
|
|
|
|
struct mca_spml_ikrit_t {
|
|
mca_spml_base_module_t super;
|
|
|
|
mxm_context_opts_t *mxm_ctx_opts;
|
|
mxm_ep_opts_t *mxm_ep_opts;
|
|
mxm_ep_opts_t *mxm_ep_hw_rdma_opts;
|
|
mxm_h mxm_context;
|
|
mxm_ep_h mxm_ep;
|
|
mxm_ep_h mxm_hw_rdma_ep;
|
|
mxm_mq_h mxm_mq;
|
|
mxm_peer_t *mxm_peers;
|
|
|
|
int32_t n_active_puts;
|
|
int32_t n_active_gets;
|
|
int32_t n_mxm_fences;
|
|
|
|
int priority; /* component priority */
|
|
int free_list_num; /* initial size of free list */
|
|
int free_list_max; /* maximum size of free list */
|
|
int free_list_inc; /* number of elements to grow free list */
|
|
int bulk_connect; /* use bulk connect */
|
|
int bulk_disconnect; /* use bulk disconnect */
|
|
|
|
bool enabled;
|
|
opal_list_t active_peers;
|
|
int n_relays; /* number of procs/node serving as relays */
|
|
|
|
char *mxm_tls;
|
|
int ud_only; /* only ud transport is used. In this case
|
|
it is possible to speedup mkey exchange
|
|
and not to register memheap */
|
|
int hw_rdma_channel; /* true if we provide separate channel that
|
|
has true one sided capability */
|
|
int np;
|
|
int unsync_conn_max;
|
|
size_t put_zcopy_threshold; /* enable zcopy in put if message size is
|
|
greater than the threshold */
|
|
};
|
|
|
|
typedef struct mca_spml_ikrit_t mca_spml_ikrit_t;
|
|
|
|
|
|
typedef struct spml_ikrit_mxm_ep_conn_info_t {
|
|
union {
|
|
struct sockaddr_storage ptl_addr[MXM_PTL_LAST];
|
|
char ep_addr[MXM_MAX_ADDR_LEN];
|
|
} addr;
|
|
} spml_ikrit_mxm_ep_conn_info_t;
|
|
|
|
extern mca_spml_ikrit_t mca_spml_ikrit;
|
|
|
|
extern int mca_spml_ikrit_enable(bool enable);
|
|
extern int mca_spml_ikrit_get(void* dst_addr,
|
|
size_t size,
|
|
void* src_addr,
|
|
int src);
|
|
extern int mca_spml_ikrit_get_nb(void* src_addr,
|
|
size_t size,
|
|
void* dst_addr,
|
|
int src,
|
|
void **handle);
|
|
|
|
extern int mca_spml_ikrit_put(void* dst_addr,
|
|
size_t size,
|
|
void* src_addr,
|
|
int dst);
|
|
extern int mca_spml_ikrit_put_nb(void* dst_addr,
|
|
size_t size,
|
|
void* src_addr,
|
|
int dst,
|
|
void **handle);
|
|
|
|
extern int mca_spml_ikrit_recv(void* buf, size_t size, int src);
|
|
extern int mca_spml_ikrit_send(void* buf,
|
|
size_t size,
|
|
int dst,
|
|
mca_spml_base_put_mode_t mode);
|
|
|
|
extern sshmem_mkey_t *mca_spml_ikrit_register(void* addr,
|
|
size_t size,
|
|
uint64_t shmid,
|
|
int *count);
|
|
extern int mca_spml_ikrit_deregister(sshmem_mkey_t *mkeys);
|
|
extern int mca_spml_ikrit_oob_get_mkeys(int pe,
|
|
uint32_t segno,
|
|
sshmem_mkey_t *mkeys);
|
|
|
|
extern int mca_spml_ikrit_add_procs(ompi_proc_t** procs, size_t nprocs);
|
|
extern int mca_spml_ikrit_del_procs(ompi_proc_t** procs, size_t nprocs);
|
|
extern int mca_spml_ikrit_fence(void);
|
|
extern int spml_ikrit_progress(void);
|
|
|
|
mxm_mem_key_t *mca_spml_ikrit_get_mkey_slow(int pe, void *va, int ptl_id, void **rva);
|
|
|
|
/* the functionreturns NULL if data can be directly copied via shared memory
|
|
* else it returns mxm mem key
|
|
*
|
|
* the function will abort() if va is not symmetric var address.
|
|
*/
|
|
static inline mxm_mem_key_t *mca_spml_ikrit_get_mkey(int pe, void *va, int ptl_id, void **rva)
|
|
{
|
|
spml_ikrit_mkey_t *mkey;
|
|
|
|
if (OPAL_UNLIKELY(MXM_PTL_RDMA != ptl_id)) {
|
|
return mca_spml_ikrit_get_mkey_slow(pe, va, ptl_id, rva);
|
|
}
|
|
|
|
mkey = mca_spml_ikrit.mxm_peers[pe].mkeys;
|
|
mkey = (spml_ikrit_mkey_t *)map_segment_find_va(&mkey->super.super, sizeof(*mkey), va);
|
|
if (OPAL_UNLIKELY(NULL == mkey)) {
|
|
return mca_spml_ikrit_get_mkey_slow(pe, va, ptl_id, rva);
|
|
}
|
|
*rva = map_segment_va2rva(&mkey->super, va);
|
|
return &mkey->key;
|
|
}
|
|
|
|
END_C_DECLS
|
|
|
|
#endif
|
|
|