Update the interface to provide a cleaner interface for RDMA operations.
The old BTL interface provided support for RDMA through the use of the btl_prepare_src and btl_prepare_dst functions. These functions were expected to prepare as much of the user buffer as possible for the RDMA operation and return a descriptor. The descriptor contained segment information on the prepared region. The btl user could then pass the RDMA segment information to a remote peer. Once the peer received that information it then packed it into a similar descriptor on the other side that could then be passed into a single btl_put or btl_get operation. Changes: - Removed the btl_prepare_dst function. This reflects the fact that RDMA operations no longer depend on "prepared" descriptors. - Removed the btl_seg_size member. There is no need to btl's to subclass the mca_btl_base_segment_t class anymore. ... Add more
Этот коммит содержится в:
родитель
0338bc80b7
Коммит
2d381f800f
@ -1,3 +1,4 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
@ -74,6 +75,39 @@ int mca_btl_base_param_register(mca_base_component_t *version,
|
||||
OPAL_INFO_LVL_4,
|
||||
MCA_BASE_VAR_SCOPE_READONLY,
|
||||
&module->btl_eager_limit);
|
||||
|
||||
if ((module->btl_flags & MCA_BTL_FLAGS_GET) && module->btl_get) {
|
||||
if (0 == module->btl_get_limit) {
|
||||
module->btl_get_limit = SIZE_MAX;
|
||||
}
|
||||
|
||||
(void) mca_base_component_var_register(version, "get_limit", "Maximum size (in bytes) for btl get",
|
||||
MCA_BASE_VAR_TYPE_SIZE_T, NULL, 0, 0, OPAL_INFO_LVL_4,
|
||||
MCA_BASE_VAR_SCOPE_READONLY, &module->btl_get_limit);
|
||||
|
||||
/* Allow the user to set the alignment. The BTL should double-check the alignment in its open
|
||||
* function. */
|
||||
(void) mca_base_component_var_register(version, "get_alignment", "Alignment required for btl get",
|
||||
MCA_BASE_VAR_TYPE_SIZE_T, NULL, 0, 0, OPAL_INFO_LVL_6,
|
||||
MCA_BASE_VAR_SCOPE_CONSTANT, &module->btl_get_alignment);
|
||||
}
|
||||
|
||||
if ((module->btl_flags & MCA_BTL_FLAGS_PUT) && module->btl_put) {
|
||||
if (0 == module->btl_put_limit) {
|
||||
module->btl_put_limit = SIZE_MAX;
|
||||
}
|
||||
(void) mca_base_component_var_register(version, "put_limit", "Maximum size (in bytes) for btl put",
|
||||
MCA_BASE_VAR_TYPE_SIZE_T, NULL, 0, 0, OPAL_INFO_LVL_4,
|
||||
MCA_BASE_VAR_SCOPE_READONLY, &module->btl_put_limit);
|
||||
|
||||
/* Allow the user to set the alignment. The BTL should double-check the alignment in its open
|
||||
* function. */
|
||||
(void) mca_base_component_var_register(version, "put_alignment", "Alignment required for btl put",
|
||||
MCA_BASE_VAR_TYPE_SIZE_T, NULL, 0, 0, OPAL_INFO_LVL_6,
|
||||
MCA_BASE_VAR_SCOPE_CONSTANT, &module->btl_put_alignment);
|
||||
}
|
||||
|
||||
|
||||
#if OPAL_CUDA_GDR_SUPPORT
|
||||
/* If no CUDA RDMA support, zero them out */
|
||||
if (!(MCA_BTL_FLAGS_CUDA_GET & module->btl_flags)) {
|
||||
@ -144,5 +178,13 @@ int mca_btl_base_param_verify(mca_btl_base_module_t *module)
|
||||
module->btl_flags &= ~MCA_BTL_FLAGS_GET;
|
||||
}
|
||||
|
||||
if (0 == module->btl_get_limit) {
|
||||
module->btl_get_limit = SIZE_MAX;
|
||||
}
|
||||
|
||||
if (0 == module->btl_put_limit) {
|
||||
module->btl_put_limit = SIZE_MAX;
|
||||
}
|
||||
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
|
@ -134,6 +134,23 @@ struct mca_btl_base_descriptor_t;
|
||||
struct mca_mpool_base_resources_t;
|
||||
struct opal_proc_t;
|
||||
|
||||
/**
|
||||
* Opaque registration handle for executing RDMA and atomic
|
||||
* operations on a memory region.
|
||||
*
|
||||
* This data inside this handle is appropriate for passing
|
||||
* to remote peers to execute RDMA and atomic operations. The
|
||||
* size needed to send the registration handle can be
|
||||
* obtained from the btl via the btl_registration_handle_size
|
||||
* member. If this size is 0 then no registration data is
|
||||
* needed to execute RDMA or atomic operations.
|
||||
*/
|
||||
struct mca_btl_base_registration_handle_t;
|
||||
typedef struct mca_btl_base_registration_handle_t mca_btl_base_registration_handle_t;
|
||||
|
||||
|
||||
/* Wildcard endpoint for use in the register_mem function */
|
||||
#define MCA_BTL_ENDPOINT_ANY (struct mca_btl_base_endpoint_t *) -1
|
||||
|
||||
/* send/recv operations require tag matching */
|
||||
typedef uint8_t mca_btl_base_tag_t;
|
||||
@ -219,6 +236,30 @@ typedef uint8_t mca_btl_base_tag_t;
|
||||
#define MCA_BTL_ERROR_FLAGS_NONFATAL 0x2
|
||||
#define MCA_BTL_ERROR_FLAGS_ADD_CUDA_IPC 0x4
|
||||
|
||||
/** registration flags */
|
||||
enum {
|
||||
/** Allow local write on the registered region. If a region is registered
|
||||
* with this flag the registration can be used as the local handle for a
|
||||
* btl_get operation. */
|
||||
MCA_BTL_REG_FLAG_LOCAL_WRITE = 0x1,
|
||||
/** Allow remote read on the registered region. If a region is registered
|
||||
* with this flag the registration can be used as the remote handle for a
|
||||
* btl_get operation. */
|
||||
MCA_BTL_REG_FLAG_REMOTE_READ = 0x2,
|
||||
/** Allow remote write on the registered region. If a region is registered
|
||||
* with this flag the registration can be used as the remote handle for a
|
||||
* btl_put operation. */
|
||||
MCA_BTL_REG_FLAG_REMOTE_WRITE = 0x4,
|
||||
/** Allow remote atomic operations on the registered region. If a region is
|
||||
* registered with this flag the registration can be used as the remote
|
||||
* handle for a btl_atomic_op or btl_atomic_fop operation. */
|
||||
MCA_BTL_REG_FLAG_REMOTE_ATOMIC = 0x8,
|
||||
/** Allow any btl operation on the registered region. If a region is registered
|
||||
* with this flag the registration can be used as the local or remote handle for
|
||||
* any btl operation. */
|
||||
MCA_BTL_REG_FLAG_ACCESS_ANY = 0xf,
|
||||
};
|
||||
|
||||
/**
|
||||
* Asynchronous callback function on completion of an operation.
|
||||
* Completion Semantics: The descriptor can be reused or returned to the
|
||||
@ -237,6 +278,32 @@ typedef void (*mca_btl_base_completion_fn_t)(
|
||||
struct mca_btl_base_descriptor_t* descriptor,
|
||||
int status);
|
||||
|
||||
|
||||
/**
|
||||
* Asynchronous callback function on completion of an rdma or atomic operation.
|
||||
* Completion Semantics: The rdma or atomic memory operation has completed
|
||||
* remotely (i.e.) is remotely visible and the caller is free to deregister
|
||||
* the local_handle or modify the memory in local_address.
|
||||
*
|
||||
* @param[IN] module the BTL module
|
||||
* @param[IN] endpoint the BTL endpoint
|
||||
* @param[IN] local_address local address for the operation (if any)
|
||||
* @param[IN] local_handle local handle associated with the local_address
|
||||
* @param[IN] context callback context supplied to the rdma/atomic operation
|
||||
* @param[IN] cbdata callback data supplied to the rdma/atomic operation
|
||||
* @param[IN] status status of the operation
|
||||
*
|
||||
*/
|
||||
typedef void (*mca_btl_base_rdma_completion_fn_t)(
|
||||
struct mca_btl_base_module_t* module,
|
||||
struct mca_btl_base_endpoint_t* endpoint,
|
||||
void *local_address,
|
||||
struct mca_btl_base_registration_handle_t *local_handle,
|
||||
void *context,
|
||||
void *cbdata,
|
||||
int status);
|
||||
|
||||
|
||||
/**
|
||||
* Describes a region/segment of memory that is addressable
|
||||
* by an BTL.
|
||||
@ -262,6 +329,7 @@ struct mca_btl_base_segment_t {
|
||||
};
|
||||
typedef struct mca_btl_base_segment_t mca_btl_base_segment_t;
|
||||
|
||||
|
||||
/**
|
||||
* A descriptor that holds the parameters to a send/put/get
|
||||
* operation along w/ a callback routine that is called on
|
||||
@ -329,6 +397,11 @@ OPAL_DECLSPEC OBJ_CLASS_DECLARATION(mca_btl_base_descriptor_t);
|
||||
*/
|
||||
#define MCA_BTL_SEG_MAX_SIZE 256
|
||||
|
||||
/**
|
||||
* Maximum size of a BTL registration handle in bytes
|
||||
*/
|
||||
#define MCA_BTL_REG_HANDLE_MAX_SIZE 256
|
||||
|
||||
/*
|
||||
* BTL base header, stores the tag at a minimum
|
||||
*/
|
||||
@ -655,6 +728,43 @@ typedef struct mca_btl_base_descriptor_t* (*mca_btl_base_module_prepare_fn_t)(
|
||||
uint32_t flags
|
||||
);
|
||||
|
||||
/**
|
||||
* @brief Register a memory region for put/get/atomic operations.
|
||||
*
|
||||
* @param btl (IN) BTL module
|
||||
* @param endpoint(IN) BTL addressing information (or NULL for all endpoints)
|
||||
* @param base (IN) Pointer to start of region
|
||||
* @param size (IN) Size of region
|
||||
* @param flags (IN) Flags indicating what operation will be performed. Valid
|
||||
* values are MCA_BTL_DES_FLAGS_PUT, MCA_BTL_DES_FLAGS_GET,
|
||||
* and MCA_BTL_DES_FLAGS_ATOMIC
|
||||
*
|
||||
* @returns a memory registration handle valid for both local and remote operations
|
||||
* @returns NULL if the region could not be registered
|
||||
*
|
||||
* This function registers the specified region with the hardware for use with
|
||||
* the btl_put, btl_get, btl_atomic_cas, btl_atomic_op, and btl_atomic_fop
|
||||
* functions. Care should be taken to not hold an excessive number of registrations
|
||||
* as they may use limited system/NIC resources.
|
||||
*/
|
||||
typedef struct mca_btl_base_registration_handle_t *(*mca_btl_base_module_register_mem_fn_t)(
|
||||
struct mca_btl_base_module_t* btl, struct mca_btl_base_endpoint_t *endpoint, void *base,
|
||||
size_t size, uint32_t flags);
|
||||
|
||||
/**
|
||||
* @brief Deregister a memory region
|
||||
*
|
||||
* @param btl (IN) BTL module region was registered with
|
||||
* @param handle (IN) BTL registration handle to deregister
|
||||
*
|
||||
* This function deregisters the memory region associated with the specified handle. Care
|
||||
* should be taken to not perform any RDMA or atomic operation on this memory region
|
||||
* after it is deregistered. It is erroneous to specify a memory handle associated with
|
||||
* a remote node.
|
||||
*/
|
||||
typedef int (*mca_btl_base_module_deregister_mem_fn_t)(
|
||||
struct mca_btl_base_module_t* btl, struct mca_btl_base_registration_handle_t *handle);
|
||||
|
||||
/**
|
||||
* Initiate an asynchronous send.
|
||||
* Completion Semantics: the descriptor has been queued for a send operation
|
||||
@ -722,9 +832,12 @@ typedef int (*mca_btl_base_module_sendi_fn_t)(
|
||||
|
||||
/**
|
||||
* Initiate an asynchronous put.
|
||||
* Completion Semantics: the descriptor has been queued for a put operation
|
||||
* the BTL now controls the descriptor until local
|
||||
* completion callback is made on the descriptor
|
||||
* Completion Semantics: if this function returns a 1 then the operation
|
||||
* is complete. a return of OPAL_SUCCESS indicates
|
||||
* the put operation has been queued with the
|
||||
* network. the local_handle can not be deregistered
|
||||
* until all outstanding operations on that handle
|
||||
* have been completed.
|
||||
*
|
||||
* BTLs that do not have the MCA_BTL_FLAGS_RDMA_MATCHED flag set
|
||||
* allow multiple concurrent put operations on the same descriptor.
|
||||
@ -732,19 +845,32 @@ typedef int (*mca_btl_base_module_sendi_fn_t)(
|
||||
* a corresponding prepare_src/dst call for each put operation and
|
||||
* therefore prohibit multiple concurrent put operations.
|
||||
*
|
||||
* @param btl (IN) BTL module
|
||||
* @param endpoint (IN) BTL addressing information
|
||||
* @param descriptor (IN) Description of the data to be transferred
|
||||
* @param btl (IN) BTL module
|
||||
* @param endpoint (IN) BTL addressing information
|
||||
* @param local_address (IN) Local address to put from (registered)
|
||||
* @param remote_address (IN) Remote address to put to (registered remotely)
|
||||
* @param local_handle (IN) Registration handle for region containing
|
||||
* (local_address, local_address + size)
|
||||
* @param remote_handle (IN) Remote registration handle for region containing
|
||||
* (remote_address, remote_address + size)
|
||||
* @param size (IN) Number of bytes to put
|
||||
* @param flags (IN) Flags for this put operation
|
||||
* @param cbfunc (IN) Function to call on completion (if queued)
|
||||
* @param cbcontext (IN) Context for the callback
|
||||
* @param cbdata (IN) Data for callback
|
||||
*
|
||||
* @retval OPAL_SUCCESS The descriptor was successfully queued for a put
|
||||
* @retval OPAL_ERROR The descriptor was NOT successfully queued for a put
|
||||
* @retval OPAL_ERR_OUT_OF_RESOURCE Insufficient resources to queue the put
|
||||
* operation. Try again later
|
||||
* @retval OPAL_ERR_NOT_AVAILABLE Put can not be performed due to size or
|
||||
* alignment restrictions.
|
||||
*/
|
||||
|
||||
typedef int (*mca_btl_base_module_put_fn_t)(
|
||||
struct mca_btl_base_module_t* btl,
|
||||
struct mca_btl_base_endpoint_t* endpoint,
|
||||
struct mca_btl_base_descriptor_t* descriptor
|
||||
);
|
||||
typedef int (*mca_btl_base_module_put_fn_t) (struct mca_btl_base_module_t *btl,
|
||||
struct mca_btl_base_endpoint_t *endpoint, void *local_address,
|
||||
uint64_t remote_address, struct mca_btl_base_registration_handle_t *local_handle,
|
||||
struct mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags,
|
||||
mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata);
|
||||
|
||||
/**
|
||||
* Initiate an asynchronous get.
|
||||
@ -767,13 +893,47 @@ typedef int (*mca_btl_base_module_put_fn_t)(
|
||||
* @retval OPAL_ERROR The descriptor was NOT successfully queued for a get
|
||||
*
|
||||
*/
|
||||
|
||||
typedef int (*mca_btl_base_module_get_fn_t)(
|
||||
struct mca_btl_base_module_t* btl,
|
||||
struct mca_btl_base_endpoint_t* endpoint,
|
||||
struct mca_btl_base_descriptor_t* descriptor
|
||||
);
|
||||
|
||||
/**
|
||||
* Initiate an asynchronous get.
|
||||
* Completion Semantics: if this function returns a 1 then the operation
|
||||
* is complete. a return of OPAL_SUCCESS indicates
|
||||
* the get operation has been queued with the
|
||||
* network. the local_handle can not be deregistered
|
||||
* until all outstanding operations on that handle
|
||||
* have been completed.
|
||||
*
|
||||
* BTLs that do not have the MCA_BTL_FLAGS_RDMA_MATCHED flag set
|
||||
* allow multiple concurrent put operations on the same descriptor.
|
||||
* BTLs that do have the MCA_BTL_FLAGS_RDMA_MATCHED flag set require
|
||||
* a corresponding prepare_src/dst call for each put operation and
|
||||
* therefore prohibit multiple concurrent put operations.
|
||||
*
|
||||
* @param btl (IN) BTL module
|
||||
* @param endpoint (IN) BTL addressing information
|
||||
* @param local_address (IN) Local address to put from (registered)
|
||||
* @param remote_address (IN) Remote address to put to (registered remotely)
|
||||
* @param local_handle (IN) Registration handle for region containing
|
||||
* (local_address, local_address + size)
|
||||
* @param remote_handle (IN) Remote registration handle for region containing
|
||||
* (remote_address, remote_address + size)
|
||||
* @param size (IN) Number of bytes to put
|
||||
* @param flags (IN) Flags for this put operation
|
||||
* @param cbfunc (IN) Function to call on completion (if queued)
|
||||
* @param cbcontext (IN) Context for the callback
|
||||
* @param cbdata (IN) Data for callback
|
||||
*
|
||||
* @retval OPAL_SUCCESS The descriptor was successfully queued for a put
|
||||
* @retval OPAL_ERROR The descriptor was NOT successfully queued for a put
|
||||
* @retval OPAL_ERR_OUT_OF_RESOURCE Insufficient resources to queue the put
|
||||
* operation. Try again later
|
||||
* @retval OPAL_ERR_NOT_AVAILABLE Put can not be performed due to size or
|
||||
* alignment restrictions.
|
||||
*/
|
||||
typedef int (*mca_btl_base_module_get_fn_t) (struct mca_btl_base_module_t *btl,
|
||||
struct mca_btl_base_endpoint_t *endpoint, void *local_address,
|
||||
uint64_t remote_address, struct mca_btl_base_registration_handle_t *local_handle,
|
||||
struct mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags,
|
||||
mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata);
|
||||
|
||||
/**
|
||||
* Diagnostic dump of btl state.
|
||||
@ -813,7 +973,13 @@ struct mca_btl_base_module_t {
|
||||
uint32_t btl_latency; /**< relative ranking of latency used to prioritize btls */
|
||||
uint32_t btl_bandwidth; /**< bandwidth (Mbytes/sec) supported by each endpoint */
|
||||
uint32_t btl_flags; /**< flags (put/get...) */
|
||||
size_t btl_seg_size; /**< size of a btl segment */
|
||||
size_t btl_registration_handle_size; /**< size of the BTLs registration handles */
|
||||
|
||||
/* One-sided limitations (0 for no alignment, SIZE_MAX for no limit ) */
|
||||
size_t btl_get_limit; /**< maximum size supported by the btl_get function */
|
||||
size_t btl_get_alignment; /**< minimum alignment/size needed by btl_get (power of 2) */
|
||||
size_t btl_put_limit; /**< maximum size supported by the btl_put function */
|
||||
size_t btl_put_alignment; /**< minimum alignment/size needed by btl_put (power of 2) */
|
||||
|
||||
/* BTL function table */
|
||||
mca_btl_base_module_add_procs_fn_t btl_add_procs;
|
||||
@ -824,13 +990,16 @@ struct mca_btl_base_module_t {
|
||||
mca_btl_base_module_alloc_fn_t btl_alloc;
|
||||
mca_btl_base_module_free_fn_t btl_free;
|
||||
mca_btl_base_module_prepare_fn_t btl_prepare_src;
|
||||
mca_btl_base_module_prepare_fn_t btl_prepare_dst;
|
||||
mca_btl_base_module_send_fn_t btl_send;
|
||||
mca_btl_base_module_sendi_fn_t btl_sendi;
|
||||
mca_btl_base_module_put_fn_t btl_put;
|
||||
mca_btl_base_module_get_fn_t btl_get;
|
||||
mca_btl_base_module_dump_fn_t btl_dump;
|
||||
|
||||
|
||||
/* new memory registration functions */
|
||||
mca_btl_base_module_register_mem_fn_t btl_register_mem; /**< memory registration function (NULL if not needed) */
|
||||
mca_btl_base_module_deregister_mem_fn_t btl_deregister_mem; /**< memory deregistration function (NULL if not needed) */
|
||||
|
||||
/** the mpool associated with this btl (optional) */
|
||||
mca_mpool_base_module_t* btl_mpool;
|
||||
/** register a default error handler */
|
||||
|
@ -38,13 +38,17 @@
|
||||
#include "btl_self_frag.h"
|
||||
#include "opal/util/proc.h"
|
||||
|
||||
static int mca_btl_self_put (struct mca_btl_base_module_t* btl,
|
||||
struct mca_btl_base_endpoint_t* endpoint,
|
||||
struct mca_btl_base_descriptor_t* des);
|
||||
int mca_btl_self_put (struct mca_btl_base_module_t *btl,
|
||||
struct mca_btl_base_endpoint_t *endpoint, void *local_address,
|
||||
uint64_t remote_address, struct mca_btl_base_registration_handle_t *local_handle,
|
||||
struct mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags,
|
||||
mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata);
|
||||
|
||||
static int mca_btl_self_get (struct mca_btl_base_module_t* btl,
|
||||
struct mca_btl_base_endpoint_t* endpoint,
|
||||
struct mca_btl_base_descriptor_t* des);
|
||||
int mca_btl_self_get (struct mca_btl_base_module_t *btl,
|
||||
struct mca_btl_base_endpoint_t *endpoint, void *local_address,
|
||||
uint64_t remote_address, struct mca_btl_base_registration_handle_t *local_handle,
|
||||
struct mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags,
|
||||
mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata);
|
||||
|
||||
mca_btl_base_module_t mca_btl_self = {
|
||||
.btl_component = &mca_btl_self_component.super,
|
||||
@ -54,7 +58,6 @@ mca_btl_base_module_t mca_btl_self = {
|
||||
.btl_alloc = mca_btl_self_alloc,
|
||||
.btl_free = mca_btl_self_free,
|
||||
.btl_prepare_src = mca_btl_self_prepare_src,
|
||||
.btl_prepare_dst = mca_btl_self_prepare_dst,
|
||||
.btl_send = mca_btl_self_send,
|
||||
.btl_put = mca_btl_self_put,
|
||||
.btl_get = mca_btl_self_get,
|
||||
@ -236,39 +239,6 @@ mca_btl_self_prepare_src( struct mca_btl_base_module_t* btl,
|
||||
|
||||
return &frag->base;
|
||||
}
|
||||
|
||||
/**
|
||||
* Prepare data for receive.
|
||||
*/
|
||||
struct mca_btl_base_descriptor_t*
|
||||
mca_btl_self_prepare_dst( struct mca_btl_base_module_t* btl,
|
||||
struct mca_btl_base_endpoint_t* endpoint,
|
||||
mca_mpool_base_registration_t* registration,
|
||||
struct opal_convertor_t* convertor,
|
||||
uint8_t order,
|
||||
size_t reserve,
|
||||
size_t* size,
|
||||
uint32_t flags )
|
||||
{
|
||||
mca_btl_self_frag_t* frag;
|
||||
size_t max_data = *size;
|
||||
void *ptr;
|
||||
|
||||
MCA_BTL_SELF_FRAG_ALLOC_RDMA(frag);
|
||||
if(OPAL_UNLIKELY(NULL == frag)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* setup descriptor to point directly to user buffer */
|
||||
opal_convertor_get_current_pointer( convertor, &ptr );
|
||||
frag->segment.seg_addr.lval = (uint64_t)(uintptr_t) ptr;
|
||||
|
||||
frag->segment.seg_len = reserve + max_data;
|
||||
frag->base.des_local = &frag->segment;
|
||||
frag->base.des_local_count = 1;
|
||||
frag->base.des_flags = flags;
|
||||
return &frag->base;
|
||||
}
|
||||
|
||||
/**
|
||||
* Initiate a send to the peer.
|
||||
@ -305,100 +275,31 @@ int mca_btl_self_send( struct mca_btl_base_module_t* btl,
|
||||
return 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Initiate a put to the peer.
|
||||
*
|
||||
* @param btl (IN) BTL module
|
||||
* @param peer (IN) BTL peer addressing
|
||||
*/
|
||||
|
||||
static int mca_btl_self_rdma( struct mca_btl_base_module_t* btl,
|
||||
struct mca_btl_base_endpoint_t* endpoint,
|
||||
struct mca_btl_base_descriptor_t* des,
|
||||
mca_btl_base_segment_t* src, size_t src_cnt,
|
||||
mca_btl_base_segment_t* dst, size_t dst_cnt)
|
||||
int mca_btl_self_put (struct mca_btl_base_module_t *btl,
|
||||
struct mca_btl_base_endpoint_t *endpoint, void *local_address,
|
||||
uint64_t remote_address, struct mca_btl_base_registration_handle_t *local_handle,
|
||||
struct mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags,
|
||||
mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata)
|
||||
{
|
||||
unsigned char* src_addr = (unsigned char *)(uintptr_t) src->seg_addr.lval;
|
||||
size_t src_len = src->seg_len;
|
||||
unsigned char* dst_addr = (unsigned char *)(uintptr_t) dst->seg_addr.lval;
|
||||
size_t dst_len = dst->seg_len;
|
||||
int btl_ownership = (des->des_flags & MCA_BTL_DES_FLAGS_BTL_OWNERSHIP);
|
||||
memcpy ((void *)(intptr_t) remote_address, local_address, size);
|
||||
|
||||
while(src_len && dst_len) {
|
||||
cbfunc (btl, endpoint, local_address, NULL, cbcontext, cbdata, OPAL_SUCCESS);
|
||||
|
||||
if(src_len == dst_len) {
|
||||
memcpy(dst_addr, src_addr, src_len);
|
||||
|
||||
/* advance src */
|
||||
if(--src_cnt != 0) {
|
||||
src++;
|
||||
src_addr = (unsigned char*)src->seg_addr.pval;
|
||||
src_len = src->seg_len;
|
||||
} else {
|
||||
src_len = 0;
|
||||
}
|
||||
|
||||
/* advance dst */
|
||||
if(--dst_cnt != 0) {
|
||||
dst++;
|
||||
dst_addr = (unsigned char*)dst->seg_addr.pval;
|
||||
dst_len = dst->seg_len;
|
||||
} else {
|
||||
dst_len = 0;
|
||||
}
|
||||
|
||||
} else {
|
||||
size_t bytes = src_len < dst_len ? src_len : dst_len;
|
||||
memcpy(dst_addr, src_addr, bytes);
|
||||
|
||||
/* advance src */
|
||||
src_len -= bytes;
|
||||
if(src_len == 0) {
|
||||
if(--src_cnt != 0) {
|
||||
src++;
|
||||
src_addr = (unsigned char*)src->seg_addr.pval;
|
||||
src_len = src->seg_len;
|
||||
}
|
||||
} else {
|
||||
src_addr += bytes;
|
||||
}
|
||||
|
||||
/* advance dst */
|
||||
dst_len -= bytes;
|
||||
if(dst_len == 0) {
|
||||
if(--dst_cnt != 0) {
|
||||
dst++;
|
||||
dst_addr = (unsigned char*)src->seg_addr.pval;
|
||||
dst_len = src->seg_len;
|
||||
}
|
||||
} else {
|
||||
dst_addr += bytes;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* rdma completion */
|
||||
des->des_cbfunc( btl, endpoint, des, OPAL_SUCCESS );
|
||||
if( btl_ownership ) {
|
||||
mca_btl_self_free( btl, des );
|
||||
}
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
|
||||
static int mca_btl_self_put (struct mca_btl_base_module_t* btl,
|
||||
struct mca_btl_base_endpoint_t* endpoint,
|
||||
struct mca_btl_base_descriptor_t* des)
|
||||
int mca_btl_self_get (struct mca_btl_base_module_t *btl,
|
||||
struct mca_btl_base_endpoint_t *endpoint, void *local_address,
|
||||
uint64_t remote_address, struct mca_btl_base_registration_handle_t *local_handle,
|
||||
struct mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags,
|
||||
mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata)
|
||||
{
|
||||
return mca_btl_self_rdma (btl, endpoint, des, des->des_local, des->des_local_count,
|
||||
des->des_remote, des->des_remote_count);
|
||||
}
|
||||
memcpy (local_address, (void *)(intptr_t) remote_address, size);
|
||||
|
||||
static int mca_btl_self_get (struct mca_btl_base_module_t *btl,
|
||||
struct mca_btl_base_endpoint_t *endpoint,
|
||||
struct mca_btl_base_descriptor_t *des)
|
||||
{
|
||||
return mca_btl_self_rdma (btl, endpoint, des, des->des_remote, des->des_remote_count,
|
||||
des->des_local, des->des_local_count);
|
||||
cbfunc (btl, endpoint, local_address, NULL, cbcontext, cbdata, OPAL_SUCCESS);
|
||||
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
|
||||
int mca_btl_self_ft_event(int state) {
|
||||
|
@ -99,7 +99,6 @@ static int mca_btl_self_component_register(void)
|
||||
mca_btl_self.btl_rdma_pipeline_frag_size = INT_MAX;
|
||||
mca_btl_self.btl_min_rdma_pipeline_size = 0;
|
||||
mca_btl_self.btl_flags = MCA_BTL_FLAGS_PUT | MCA_BTL_FLAGS_SEND_INPLACE;
|
||||
mca_btl_self.btl_seg_size = sizeof (mca_btl_base_segment_t);
|
||||
mca_btl_self.btl_bandwidth = 100;
|
||||
mca_btl_self.btl_latency = 0;
|
||||
mca_btl_base_param_register(&mca_btl_self_component.super.btl_version,
|
||||
|
@ -251,7 +251,6 @@ static int sm_register(void)
|
||||
mca_btl_sm.super.btl_rdma_pipeline_frag_size = 64*1024;
|
||||
mca_btl_sm.super.btl_min_rdma_pipeline_size = 64*1024;
|
||||
mca_btl_sm.super.btl_flags = MCA_BTL_FLAGS_SEND;
|
||||
mca_btl_sm.super.btl_seg_size = sizeof (mca_btl_sm_segment_t);
|
||||
mca_btl_sm.super.btl_bandwidth = 9000; /* Mbs */
|
||||
mca_btl_sm.super.btl_latency = 1; /* Microsecs */
|
||||
|
||||
|
@ -33,6 +33,7 @@
|
||||
#include "opal/mca/btl/base/btl_base_error.h"
|
||||
#include "opal/class/opal_hash_table.h"
|
||||
#include "opal/class/ompi_free_list.h"
|
||||
#include "opal/class/opal_free_list.h"
|
||||
#include "opal/mca/common/ugni/common_ugni.h"
|
||||
|
||||
#include <errno.h>
|
||||
@ -80,6 +81,11 @@ typedef struct mca_btl_ugni_module_t {
|
||||
opal_mutex_t eager_get_pending_lock;
|
||||
opal_list_t eager_get_pending;
|
||||
|
||||
opal_mutex_t pending_descriptors_lock;
|
||||
opal_list_t pending_descriptors;
|
||||
|
||||
ompi_free_list_t post_descriptors;
|
||||
|
||||
mca_mpool_base_module_t *smsg_mpool;
|
||||
ompi_free_list_t smsg_mboxes;
|
||||
|
||||
@ -143,8 +149,6 @@ typedef struct mca_btl_ugni_component_t {
|
||||
|
||||
/* After this message size switch to BTE protocols */
|
||||
size_t ugni_fma_limit;
|
||||
/* Switch to put when trying to GET at or above this size */
|
||||
size_t ugni_get_limit;
|
||||
/* Switch to get when sending above this size */
|
||||
size_t ugni_smsg_limit;
|
||||
|
||||
@ -269,10 +273,13 @@ mca_btl_ugni_sendi (struct mca_btl_base_module_t *btl,
|
||||
* @param endpoint (IN) BTL addressing information
|
||||
* @param descriptor (IN) Description of the data to be transferred
|
||||
*/
|
||||
int
|
||||
mca_btl_ugni_get (struct mca_btl_base_module_t *btl,
|
||||
struct mca_btl_base_endpoint_t *endpoint,
|
||||
struct mca_btl_base_descriptor_t *des);
|
||||
int mca_btl_ugni_get (struct mca_btl_base_module_t *btl,
|
||||
struct mca_btl_base_endpoint_t *endpoint,
|
||||
void *local_address, uint64_t remote_address,
|
||||
struct mca_btl_base_registration_handle_t *local_handle,
|
||||
struct mca_btl_base_registration_handle_t *remote_handle,
|
||||
size_t size, int flags, mca_btl_base_rdma_completion_fn_t cbfunc,
|
||||
void *cbcontext, void *cbdata);
|
||||
|
||||
/**
|
||||
* Initiate a put operation.
|
||||
@ -283,10 +290,13 @@ mca_btl_ugni_get (struct mca_btl_base_module_t *btl,
|
||||
* @param endpoint (IN) BTL addressing information
|
||||
* @param descriptor (IN) Description of the data to be transferred
|
||||
*/
|
||||
int
|
||||
mca_btl_ugni_put (struct mca_btl_base_module_t *btl,
|
||||
struct mca_btl_base_endpoint_t *endpoint,
|
||||
struct mca_btl_base_descriptor_t *des);
|
||||
int mca_btl_ugni_put (struct mca_btl_base_module_t *btl,
|
||||
struct mca_btl_base_endpoint_t *endpoint,
|
||||
void *local_address, uint64_t remote_address,
|
||||
struct mca_btl_base_registration_handle_t *local_handle,
|
||||
struct mca_btl_base_registration_handle_t *remote_handle,
|
||||
size_t size, int flags, mca_btl_base_rdma_completion_fn_t cbfunc,
|
||||
void *cbcontext, void *cbdata);
|
||||
|
||||
int mca_btl_ugni_progress_send_wait_list (struct mca_btl_base_endpoint_t *endpoint);
|
||||
|
||||
@ -295,9 +305,14 @@ mca_btl_ugni_alloc(struct mca_btl_base_module_t *btl,
|
||||
struct mca_btl_base_endpoint_t *endpoint,
|
||||
uint8_t order, size_t size, uint32_t flags);
|
||||
|
||||
struct mca_btl_base_registration_handle_t {
|
||||
/** uGNI memory handle */
|
||||
gni_mem_handle_t gni_handle;
|
||||
};
|
||||
|
||||
typedef struct mca_btl_ugni_reg_t {
|
||||
mca_mpool_base_registration_t base;
|
||||
gni_mem_handle_t memory_hdl;
|
||||
mca_btl_base_registration_handle_t handle;
|
||||
} mca_btl_ugni_reg_t;
|
||||
|
||||
/* Global structures */
|
||||
|
@ -188,7 +188,7 @@ static int ugni_reg_rdma_mem (void *reg_data, void *base, size_t size,
|
||||
OPAL_THREAD_LOCK(&ugni_module->device->dev_lock);
|
||||
rc = GNI_MemRegister (ugni_module->device->dev_handle, (uint64_t) base,
|
||||
size, NULL, GNI_MEM_READWRITE | GNI_MEM_RELAXED_PI_ORDERING,
|
||||
-1, &(ugni_reg->memory_hdl));
|
||||
-1, &(ugni_reg->handle.gni_handle));
|
||||
OPAL_THREAD_UNLOCK(&ugni_module->device->dev_lock);
|
||||
|
||||
if (OPAL_UNLIKELY(GNI_RC_SUCCESS != rc)) {
|
||||
@ -211,7 +211,7 @@ static int ugni_reg_smsg_mem (void *reg_data, void *base, size_t size,
|
||||
OPAL_THREAD_LOCK(&ugni_module->device->dev_lock);
|
||||
rc = GNI_MemRegister (ugni_module->device->dev_handle, (uint64_t) base,
|
||||
size, ugni_module->smsg_remote_cq, GNI_MEM_READWRITE, -1,
|
||||
&(ugni_reg->memory_hdl));
|
||||
&(ugni_reg->handle.gni_handle));
|
||||
OPAL_THREAD_UNLOCK(&ugni_module->device->dev_lock);
|
||||
return opal_common_rc_ugni_to_opal (rc);
|
||||
}
|
||||
@ -224,7 +224,7 @@ ugni_dereg_mem (void *reg_data, mca_mpool_base_registration_t *reg)
|
||||
gni_return_t rc;
|
||||
|
||||
OPAL_THREAD_LOCK(&ugni_module->device->dev_lock);
|
||||
rc = GNI_MemDeregister (ugni_module->device->dev_handle, &ugni_reg->memory_hdl);
|
||||
rc = GNI_MemDeregister (ugni_module->device->dev_handle, &ugni_reg->handle.gni_handle);
|
||||
OPAL_THREAD_UNLOCK(&ugni_module->device->dev_lock);
|
||||
if (GNI_RC_SUCCESS != rc) {
|
||||
return OPAL_ERROR;
|
||||
@ -401,6 +401,15 @@ mca_btl_ugni_setup_mpools (mca_btl_ugni_module_t *ugni_module)
|
||||
return rc;
|
||||
}
|
||||
|
||||
rc = ompi_free_list_init_new (&ugni_module->post_descriptors,
|
||||
sizeof (mca_btl_ugni_post_descriptor_t),
|
||||
8, OBJ_CLASS(mca_btl_ugni_post_descriptor_t),
|
||||
0, 0, 0, -1, 256, NULL);
|
||||
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
|
||||
BTL_ERROR(("error creating post descriptor free list"));
|
||||
return rc;
|
||||
}
|
||||
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
|
||||
|
@ -52,6 +52,7 @@ static int
|
||||
btl_ugni_component_register(void)
|
||||
{
|
||||
mca_base_var_enum_t *new_enum;
|
||||
gni_nic_device_t device_type;
|
||||
int rc;
|
||||
|
||||
(void) mca_base_var_group_component_register(&mca_btl_ugni_component.super.btl_version,
|
||||
@ -139,15 +140,6 @@ btl_ugni_component_register(void)
|
||||
OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_LOCAL,
|
||||
&mca_btl_ugni_component.ugni_fma_limit);
|
||||
|
||||
mca_btl_ugni_component.ugni_get_limit = 1 * 1024 * 1024;
|
||||
(void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
|
||||
"get_limit", "Maximum size message that "
|
||||
"will be sent using a get protocol "
|
||||
"(default 1M)", MCA_BASE_VAR_TYPE_INT,
|
||||
NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
|
||||
OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_LOCAL,
|
||||
&mca_btl_ugni_component.ugni_get_limit);
|
||||
|
||||
mca_btl_ugni_component.rdma_max_retries = 16;
|
||||
(void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
|
||||
"rdma_max_retries", NULL, MCA_BASE_VAR_TYPE_INT,
|
||||
@ -212,13 +204,24 @@ btl_ugni_component_register(void)
|
||||
mca_btl_ugni_module.super.btl_max_send_size = 8 * 1024;
|
||||
mca_btl_ugni_module.super.btl_rdma_pipeline_send_length = 8 * 1024;
|
||||
|
||||
mca_btl_ugni_module.super.btl_get_limit = 1 * 1024 * 1024;
|
||||
|
||||
/* determine if there are get alignment restrictions */
|
||||
GNI_GetDeviceType (&device_type);
|
||||
|
||||
if (GNI_DEVICE_GEMINI == device_type) {
|
||||
mca_btl_ugni_module.super.btl_get_alignment = 4;
|
||||
} else {
|
||||
mca_btl_ugni_module.super.btl_get_alignment = 0;
|
||||
}
|
||||
|
||||
/* threshold for put */
|
||||
mca_btl_ugni_module.super.btl_min_rdma_pipeline_size = 8 * 1024;
|
||||
|
||||
mca_btl_ugni_module.super.btl_flags = MCA_BTL_FLAGS_SEND |
|
||||
MCA_BTL_FLAGS_RDMA | MCA_BTL_FLAGS_SEND_INPLACE;
|
||||
|
||||
mca_btl_ugni_module.super.btl_seg_size = sizeof (mca_btl_ugni_segment_t);
|
||||
mca_btl_ugni_module.super.btl_registration_handle_size = sizeof (mca_btl_base_registration_handle_t);
|
||||
|
||||
mca_btl_ugni_module.super.btl_bandwidth = 40000; /* Mbs */
|
||||
mca_btl_ugni_module.super.btl_latency = 2; /* Microsecs */
|
||||
@ -425,89 +428,107 @@ mca_btl_ugni_progress_datagram (mca_btl_ugni_module_t *ugni_module)
|
||||
return count;
|
||||
}
|
||||
|
||||
static inline int
|
||||
mca_btl_ugni_progress_rdma (mca_btl_ugni_module_t *ugni_module, int which_cq)
|
||||
#if OPAL_ENABLE_DEBUG
|
||||
static inline void btl_ugni_dump_post_desc (mca_btl_ugni_post_descriptor_t *desc)
|
||||
{
|
||||
opal_common_ugni_post_desc_t *desc;
|
||||
mca_btl_ugni_base_frag_t *frag;
|
||||
gni_cq_entry_t event_data = 0;
|
||||
uint32_t recoverable = 1;
|
||||
gni_return_t rc;
|
||||
gni_cq_handle_t the_cq;
|
||||
|
||||
the_cq = (which_cq == 0) ? ugni_module->rdma_local_cq : ugni_module->rdma_local_irq_cq;
|
||||
fprintf (stderr, "desc->desc.base.post_id = %" PRIx64 "\n", desc->desc.base.post_id);
|
||||
fprintf (stderr, "desc->desc.base.status = %" PRIx64 "\n", desc->desc.base.status);
|
||||
fprintf (stderr, "desc->desc.base.cq_mode_complete = %hu\n", desc->desc.base.cq_mode_complete);
|
||||
fprintf (stderr, "desc->desc.base.type = %d\n", desc->desc.base.type);
|
||||
fprintf (stderr, "desc->desc.base.cq_mode = %hu\n", desc->desc.base.cq_mode);
|
||||
fprintf (stderr, "desc->desc.base.dlvr_mode = %hu\n", desc->desc.base.dlvr_mode);
|
||||
fprintf (stderr, "desc->desc.base.local_addr = %" PRIx64 "\n", desc->desc.base.local_addr);
|
||||
fprintf (stderr, "desc->desc.base.local_mem_hndl = {%" PRIx64 ", %" PRIx64 "}\n", desc->desc.base.local_mem_hndl.qword1,
|
||||
desc->desc.base.local_mem_hndl.qword2);
|
||||
fprintf (stderr, "desc->desc.base.remote_addr = %" PRIx64 "\n", desc->desc.base.remote_addr);
|
||||
fprintf (stderr, "desc->desc.base.remote_mem_hndl = {%" PRIx64 ", %" PRIx64 "}\n", desc->desc.base.remote_mem_hndl.qword1,
|
||||
desc->desc.base.remote_mem_hndl.qword2);
|
||||
fprintf (stderr, "desc->desc.base.length = %" PRIu64 "\n", desc->desc.base.length);
|
||||
fprintf (stderr, "desc->desc.base.rdma_mode = %hu\n", desc->desc.base.rdma_mode);
|
||||
fprintf (stderr, "desc->desc.base.amo_cmd = %d\n", desc->desc.base.amo_cmd);
|
||||
}
|
||||
#endif
|
||||
|
||||
static inline int mca_btl_ugni_progress_rdma (mca_btl_ugni_module_t *ugni_module, int which_cq)
|
||||
{
|
||||
mca_btl_ugni_post_descriptor_t *post_desc = NULL;
|
||||
gni_cq_entry_t event_data = 0;
|
||||
gni_post_descriptor_t *desc;
|
||||
uint32_t recoverable = 1;
|
||||
gni_return_t grc;
|
||||
|
||||
OPAL_THREAD_LOCK(&ugni_module->device->dev_lock);
|
||||
rc = GNI_CqGetEvent (the_cq, &event_data);
|
||||
if (GNI_RC_NOT_DONE == rc) {
|
||||
grc = GNI_CqGetEvent (ugni_module->rdma_local_cq, &event_data);
|
||||
if (GNI_RC_NOT_DONE == grc) {
|
||||
OPAL_THREAD_UNLOCK(&ugni_module->device->dev_lock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (OPAL_UNLIKELY((GNI_RC_SUCCESS != rc && !event_data) || GNI_CQ_OVERRUN(event_data))) {
|
||||
if (OPAL_UNLIKELY((GNI_RC_SUCCESS != grc && !event_data) || GNI_CQ_OVERRUN(event_data))) {
|
||||
/* TODO -- need to handle overrun -- how do we do this without an event?
|
||||
will the event eventually come back? Ask Cray */
|
||||
BTL_ERROR(("unhandled post error! ugni rc = %d %s", rc,gni_err_str[rc]));
|
||||
BTL_ERROR(("unhandled post error! ugni rc = %d %s", grc, gni_err_str[grc]));
|
||||
OPAL_THREAD_UNLOCK(&ugni_module->device->dev_lock);
|
||||
return opal_common_rc_ugni_to_opal (rc);
|
||||
|
||||
return opal_common_rc_ugni_to_opal (grc);
|
||||
}
|
||||
|
||||
rc = GNI_GetCompleted (the_cq, event_data, (gni_post_descriptor_t **) &desc);
|
||||
grc = GNI_GetCompleted (ugni_module->rdma_local_cq, event_data, &desc);
|
||||
OPAL_THREAD_UNLOCK(&ugni_module->device->dev_lock);
|
||||
if (OPAL_UNLIKELY(GNI_RC_SUCCESS != rc && GNI_RC_TRANSACTION_ERROR != rc)) {
|
||||
BTL_ERROR(("Error in GNI_GetComplete %s", gni_err_str[rc]));
|
||||
return opal_common_rc_ugni_to_opal (rc);
|
||||
if (OPAL_UNLIKELY(GNI_RC_SUCCESS != grc && GNI_RC_TRANSACTION_ERROR != grc)) {
|
||||
BTL_ERROR(("Error in GNI_GetComplete %s", gni_err_str[grc]));
|
||||
return opal_common_rc_ugni_to_opal (grc);
|
||||
}
|
||||
|
||||
frag = MCA_BTL_UGNI_DESC_TO_FRAG(desc);
|
||||
|
||||
if (OPAL_UNLIKELY(GNI_RC_SUCCESS != rc || !GNI_CQ_STATUS_OK(event_data))) {
|
||||
char buffer[1024];
|
||||
post_desc = MCA_BTL_UGNI_DESC_TO_PDESC(desc);
|
||||
|
||||
if (OPAL_UNLIKELY(GNI_RC_SUCCESS != grc || !GNI_CQ_STATUS_OK(event_data))) {
|
||||
(void) GNI_CqErrorRecoverable (event_data, &recoverable);
|
||||
GNI_CqErrorStr(event_data,buffer,sizeof(buffer));
|
||||
|
||||
if (OPAL_UNLIKELY(++frag->post_desc.tries >= mca_btl_ugni_component.rdma_max_retries ||
|
||||
if (OPAL_UNLIKELY(++post_desc->desc.tries >= mca_btl_ugni_component.rdma_max_retries ||
|
||||
!recoverable)) {
|
||||
char char_buffer[1024];
|
||||
GNI_CqErrorStr (event_data, char_buffer, 1024);
|
||||
/* give up */
|
||||
BTL_ERROR(("giving up on frag %p type %d CQE error %s", (void *) frag, frag->post_desc.base.type, buffer));
|
||||
mca_btl_ugni_frag_complete (frag, OPAL_ERROR);
|
||||
BTL_ERROR(("giving up on desciptor %p, recoverable %d: %s", (void *) post_desc,
|
||||
recoverable, char_buffer));
|
||||
#if OPAL_ENABLE_DEBUG
|
||||
btl_ugni_dump_post_desc (post_desc);
|
||||
#endif
|
||||
mca_btl_ugni_post_desc_complete (ugni_module, post_desc, OPAL_ERROR);
|
||||
|
||||
return OPAL_ERROR;
|
||||
}
|
||||
|
||||
/* repost transaction */
|
||||
mca_btl_ugni_repost (frag);
|
||||
mca_btl_ugni_repost (ugni_module, post_desc);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
BTL_VERBOSE(("RDMA/FMA complete for frag %p", (void *) frag));
|
||||
|
||||
mca_btl_ugni_frag_complete (frag, opal_common_rc_ugni_to_opal (rc));
|
||||
mca_btl_ugni_post_desc_complete (ugni_module, post_desc, opal_common_rc_ugni_to_opal (grc));
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static inline int
|
||||
mca_btl_ugni_retry_failed (mca_btl_ugni_module_t *ugni_module)
|
||||
mca_btl_ugni_post_pending (mca_btl_ugni_module_t *ugni_module)
|
||||
{
|
||||
int count = opal_list_get_size (&ugni_module->failed_frags);
|
||||
int count = opal_list_get_size (&ugni_module->pending_descriptors);
|
||||
int i;
|
||||
|
||||
for (i = 0 ; i < count ; ++i) {
|
||||
OPAL_THREAD_LOCK(&ugni_module->failed_frags_lock);
|
||||
mca_btl_ugni_base_frag_t *frag =
|
||||
(mca_btl_ugni_base_frag_t *) opal_list_remove_first (&ugni_module->failed_frags);
|
||||
OPAL_THREAD_UNLOCK(&ugni_module->failed_frags_lock);
|
||||
if (NULL == frag) {
|
||||
OPAL_THREAD_LOCK(&ugni_module->pending_descriptors_lock);
|
||||
mca_btl_ugni_post_descriptor_t *post_desc =
|
||||
(mca_btl_ugni_post_descriptor_t *) opal_list_remove_first (&ugni_module->pending_descriptors);
|
||||
OPAL_THREAD_UNLOCK(&ugni_module->pending_descriptors_lock);
|
||||
|
||||
if (OPAL_SUCCESS != mca_btl_ugni_repost (ugni_module, post_desc)) {
|
||||
break;
|
||||
}
|
||||
|
||||
mca_btl_ugni_repost (frag);
|
||||
}
|
||||
|
||||
return count;
|
||||
return i;
|
||||
}
|
||||
|
||||
static inline int
|
||||
@ -557,7 +578,6 @@ static int mca_btl_ugni_component_progress (void)
|
||||
for (i = 0 ; i < mca_btl_ugni_component.ugni_num_btls ; ++i) {
|
||||
ugni_module = mca_btl_ugni_component.modules + i;
|
||||
|
||||
mca_btl_ugni_retry_failed (ugni_module);
|
||||
mca_btl_ugni_progress_wait_list (ugni_module);
|
||||
|
||||
count += mca_btl_ugni_progress_datagram (ugni_module);
|
||||
@ -565,6 +585,8 @@ static int mca_btl_ugni_component_progress (void)
|
||||
count += mca_btl_ugni_progress_remote_smsg (ugni_module);
|
||||
count += mca_btl_ugni_progress_rdma (ugni_module, 0);
|
||||
|
||||
/* post pending after progressing rdma */
|
||||
mca_btl_ugni_post_pending (ugni_module);
|
||||
}
|
||||
|
||||
return count;
|
||||
|
@ -16,7 +16,7 @@
|
||||
static inline void mca_btl_ugni_base_frag_constructor (mca_btl_ugni_base_frag_t *frag)
|
||||
{
|
||||
memset ((char *) frag + sizeof (frag->base), 0, sizeof (*frag) - sizeof (frag->base));
|
||||
frag->segments[0].base.seg_addr.pval = frag->base.super.ptr;
|
||||
frag->segments[0].seg_addr.pval = frag->base.super.ptr;
|
||||
}
|
||||
|
||||
static inline void mca_btl_ugni_eager_frag_constructor (mca_btl_ugni_base_frag_t *frag)
|
||||
@ -26,7 +26,7 @@ static inline void mca_btl_ugni_eager_frag_constructor (mca_btl_ugni_base_frag_t
|
||||
|
||||
mca_btl_ugni_base_frag_constructor (frag);
|
||||
|
||||
frag->segments[0].memory_handle = reg->memory_hdl;
|
||||
frag->memory_handle = reg->handle;
|
||||
}
|
||||
|
||||
OBJ_CLASS_INSTANCE(mca_btl_ugni_smsg_frag_t, mca_btl_base_descriptor_t,
|
||||
@ -38,6 +38,9 @@ OBJ_CLASS_INSTANCE(mca_btl_ugni_rdma_frag_t, mca_btl_base_descriptor_t,
|
||||
OBJ_CLASS_INSTANCE(mca_btl_ugni_eager_frag_t, mca_btl_base_descriptor_t,
|
||||
mca_btl_ugni_eager_frag_constructor, NULL);
|
||||
|
||||
OBJ_CLASS_INSTANCE(mca_btl_ugni_post_descriptor_t, ompi_free_list_item_t,
|
||||
NULL, NULL);
|
||||
|
||||
void mca_btl_ugni_frag_init (mca_btl_ugni_base_frag_t *frag, mca_btl_ugni_module_t *ugni_module)
|
||||
{
|
||||
frag->msg_id = opal_pointer_array_add (&ugni_module->pending_smsg_frags_bb, (void *) frag);
|
||||
|
@ -1,6 +1,6 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights
|
||||
* Copyright (c) 2011-2014 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
|
||||
* Copyright (c) 2013 The University of Tennessee and The University
|
||||
@ -19,13 +19,6 @@
|
||||
#include "btl_ugni.h"
|
||||
#include "btl_ugni_endpoint.h"
|
||||
|
||||
typedef struct mca_btl_ugni_segment_t {
|
||||
mca_btl_base_segment_t base;
|
||||
gni_mem_handle_t memory_handle;
|
||||
uint8_t extra_bytes[3];
|
||||
uint8_t extra_byte_count;
|
||||
} mca_btl_ugni_segment_t;
|
||||
|
||||
typedef struct mca_btl_ugni_send_frag_hdr_t {
|
||||
uint32_t lag;
|
||||
} mca_btl_ugni_send_frag_hdr_t;
|
||||
@ -41,7 +34,9 @@ typedef struct mca_btl_ugni_rdma_frag_hdr_t {
|
||||
|
||||
typedef struct mca_btl_ugni_eager_frag_hdr_t {
|
||||
mca_btl_ugni_send_frag_hdr_t send;
|
||||
mca_btl_ugni_segment_t src_seg;
|
||||
uint32_t size;
|
||||
uint64_t address;
|
||||
mca_btl_base_registration_handle_t memory_handle;
|
||||
void *ctx;
|
||||
} mca_btl_ugni_eager_frag_hdr_t;
|
||||
|
||||
@ -59,28 +54,28 @@ typedef union mca_btl_ugni_frag_hdr_t {
|
||||
} mca_btl_ugni_frag_hdr_t;
|
||||
|
||||
enum {
|
||||
MCA_BTL_UGNI_FRAG_BUFFERED = 1, /* frag data is buffered */
|
||||
MCA_BTL_UGNI_FRAG_COMPLETE = 2, /* smsg complete for frag */
|
||||
MCA_BTL_UGNI_FRAG_EAGER = 4, /* eager get frag */
|
||||
MCA_BTL_UGNI_FRAG_IGNORE = 8, /* ignore local smsg completion */
|
||||
MCA_BTL_UGNI_FRAG_SMSG_COMPLETE = 16 /* SMSG has completed for this message */
|
||||
MCA_BTL_UGNI_FRAG_BUFFERED = 1, /* frag data is buffered */
|
||||
MCA_BTL_UGNI_FRAG_COMPLETE = 2, /* smsg complete for frag */
|
||||
MCA_BTL_UGNI_FRAG_EAGER = 4, /* eager get frag */
|
||||
MCA_BTL_UGNI_FRAG_IGNORE = 8, /* ignore local smsg completion */
|
||||
MCA_BTL_UGNI_FRAG_SMSG_COMPLETE = 16, /* SMSG has completed for this message */
|
||||
MCA_BTL_UGNI_FRAG_RESPONSE = 32,
|
||||
};
|
||||
|
||||
struct mca_btl_ugni_base_frag_t;
|
||||
|
||||
typedef void (*frag_cb_t) (struct mca_btl_ugni_base_frag_t *, int);
|
||||
|
||||
typedef struct mca_btl_ugni_base_frag_t {
|
||||
mca_btl_base_descriptor_t base;
|
||||
uint32_t msg_id;
|
||||
uint16_t hdr_size;
|
||||
uint16_t flags;
|
||||
mca_btl_ugni_frag_hdr_t hdr;
|
||||
mca_btl_ugni_segment_t segments[2];
|
||||
mca_btl_base_segment_t segments[2];
|
||||
opal_common_ugni_post_desc_t post_desc;
|
||||
mca_btl_base_endpoint_t *endpoint;
|
||||
mca_btl_ugni_reg_t *registration;
|
||||
ompi_free_list_t *my_list;
|
||||
mca_btl_base_registration_handle_t memory_handle;
|
||||
} mca_btl_ugni_base_frag_t;
|
||||
|
||||
typedef struct mca_btl_ugni_base_frag_t mca_btl_ugni_smsg_frag_t;
|
||||
@ -90,6 +85,56 @@ typedef struct mca_btl_ugni_base_frag_t mca_btl_ugni_eager_frag_t;
|
||||
#define MCA_BTL_UGNI_DESC_TO_FRAG(desc) \
|
||||
((mca_btl_ugni_base_frag_t *)((uintptr_t) (desc) - offsetof (mca_btl_ugni_base_frag_t, post_desc)))
|
||||
|
||||
typedef struct mca_btl_ugni_post_descriptor_t {
|
||||
ompi_free_list_item_t super;
|
||||
opal_common_ugni_post_desc_t desc;
|
||||
mca_btl_base_endpoint_t *endpoint;
|
||||
mca_btl_base_registration_handle_t *local_handle;
|
||||
mca_btl_base_rdma_completion_fn_t cbfunc;
|
||||
void *cbdata;
|
||||
void *ctx;
|
||||
} mca_btl_ugni_post_descriptor_t;
|
||||
|
||||
OBJ_CLASS_DECLARATION(mca_btl_ugni_post_descriptor_t);
|
||||
|
||||
#define MCA_BTL_UGNI_DESC_TO_PDESC(desc) \
|
||||
((mca_btl_ugni_post_descriptor_t *)((uintptr_t) (desc) - offsetof (mca_btl_ugni_post_descriptor_t, desc)))
|
||||
|
||||
static inline void mca_btl_ugni_alloc_post_descriptor (mca_btl_base_endpoint_t *endpoint, mca_btl_base_registration_handle_t *local_handle,
|
||||
mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata,
|
||||
mca_btl_ugni_post_descriptor_t **desc)
|
||||
{
|
||||
ompi_free_list_item_t *item = NULL;
|
||||
|
||||
OMPI_FREE_LIST_GET_MT(&endpoint->btl->post_descriptors, item);
|
||||
*desc = (mca_btl_ugni_post_descriptor_t *) item;
|
||||
if (NULL != item) {
|
||||
(*desc)->cbfunc = cbfunc;
|
||||
(*desc)->ctx = cbcontext;
|
||||
(*desc)->cbdata = cbdata;
|
||||
(*desc)->local_handle = local_handle;
|
||||
(*desc)->endpoint = endpoint;
|
||||
}
|
||||
}
|
||||
|
||||
static inline void mca_btl_ugni_return_post_descriptor (mca_btl_ugni_module_t *module,
|
||||
mca_btl_ugni_post_descriptor_t *desc)
|
||||
{
|
||||
OMPI_FREE_LIST_RETURN_MT(&module->post_descriptors, &desc->super);
|
||||
}
|
||||
|
||||
static inline void mca_btl_ugni_post_desc_complete (mca_btl_ugni_module_t *module, mca_btl_ugni_post_descriptor_t *desc, int rc)
|
||||
{
|
||||
BTL_VERBOSE(("RDMA/FMA/ATOMIC operation complete for post descriptor %p. rc = %d", (void *) desc, rc));
|
||||
|
||||
/* call the user's callback function */
|
||||
desc->cbfunc (&module->super, desc->endpoint, (void *)(intptr_t) desc->desc.base.local_addr,
|
||||
desc->local_handle, desc->ctx, desc->cbdata, rc);
|
||||
|
||||
/* the descriptor is no longer needed */
|
||||
mca_btl_ugni_return_post_descriptor (module, desc);
|
||||
}
|
||||
|
||||
OBJ_CLASS_DECLARATION(mca_btl_ugni_smsg_frag_t);
|
||||
OBJ_CLASS_DECLARATION(mca_btl_ugni_rdma_frag_t);
|
||||
OBJ_CLASS_DECLARATION(mca_btl_ugni_eager_frag_t);
|
||||
|
@ -13,44 +13,34 @@
|
||||
#include "btl_ugni_rdma.h"
|
||||
#include "btl_ugni_smsg.h"
|
||||
|
||||
/**
|
||||
* Initiate a get operation.
|
||||
*
|
||||
* @param btl (IN) BTL module
|
||||
* @param endpoint (IN) BTL addressing information
|
||||
* @param descriptor (IN) Description of the data to be transferred
|
||||
*/
|
||||
int mca_btl_ugni_get (struct mca_btl_base_module_t *btl,
|
||||
struct mca_btl_base_endpoint_t *endpoint,
|
||||
struct mca_btl_base_descriptor_t *des) {
|
||||
mca_btl_ugni_base_frag_t *frag = (mca_btl_ugni_base_frag_t *) des;
|
||||
mca_btl_ugni_segment_t *src_seg = (mca_btl_ugni_segment_t *) des->des_remote;
|
||||
mca_btl_ugni_segment_t *dst_seg = (mca_btl_ugni_segment_t *) des->des_local;
|
||||
size_t size = src_seg->base.seg_len - src_seg->extra_byte_count;
|
||||
void *local_address, uint64_t remote_address,
|
||||
struct mca_btl_base_registration_handle_t *local_handle,
|
||||
struct mca_btl_base_registration_handle_t *remote_handle,
|
||||
size_t size, int flags, mca_btl_base_rdma_completion_fn_t cbfunc,
|
||||
void *cbcontext, void *cbdata)
|
||||
{
|
||||
bool check;
|
||||
|
||||
BTL_VERBOSE(("Using RDMA/FMA Get"));
|
||||
/* Check if the get is aligned/sized on a multiple of 4 */
|
||||
check = !!((remote_address | (uint64_t)(intptr_t) local_address | size) & (mca_btl_ugni_module.super.btl_get_alignment - 1));
|
||||
|
||||
if (OPAL_UNLIKELY(check || size > mca_btl_ugni_module.super.btl_get_limit)) {
|
||||
BTL_VERBOSE(("RDMA/FMA Get not available due to size or alignment restrictions"));
|
||||
|
||||
/* notify the caller that get is not available */
|
||||
return OPAL_ERR_NOT_AVAILABLE;
|
||||
}
|
||||
|
||||
BTL_VERBOSE(("Using RDMA/FMA Get from local address %p to remote address %" PRIx64,
|
||||
local_address, remote_address));
|
||||
|
||||
/* cause endpoint to bind if it isn't already (bind is sufficient for rdma) */
|
||||
(void) mca_btl_ugni_check_endpoint_state(endpoint);
|
||||
|
||||
/* Check if the get is aligned/sized on a multiple of 4 */
|
||||
check = !!((des->des_remote->seg_addr.lval | des->des_local->seg_addr.lval | size) & 3);
|
||||
|
||||
if (OPAL_UNLIKELY(check || size > mca_btl_ugni_component.ugni_get_limit)) {
|
||||
/* switch to put */
|
||||
return OPAL_ERR_NOT_AVAILABLE;
|
||||
}
|
||||
|
||||
if (src_seg->extra_byte_count) {
|
||||
memmove ((char *) dst_seg->base.seg_addr.pval + size, src_seg->extra_bytes, src_seg->extra_byte_count);
|
||||
src_seg->base.seg_len = size;
|
||||
dst_seg->base.seg_len = size;
|
||||
}
|
||||
|
||||
des->des_flags |= MCA_BTL_DES_SEND_ALWAYS_CALLBACK;
|
||||
|
||||
return mca_btl_ugni_post (frag, true, dst_seg, src_seg);
|
||||
return mca_btl_ugni_post (endpoint, true, size, local_address, remote_address, local_handle,
|
||||
remote_handle, cbfunc, cbcontext, cbdata);
|
||||
}
|
||||
|
||||
/* eager get */
|
||||
@ -60,6 +50,8 @@ static void mca_btl_ugni_callback_eager_get_progress_pending (struct mca_btl_bas
|
||||
mca_btl_ugni_module_t *ugni_module = (mca_btl_ugni_module_t *) btl;
|
||||
mca_btl_ugni_base_frag_t *pending_frag, *frag = (mca_btl_ugni_base_frag_t *) desc;
|
||||
|
||||
memset (&frag->hdr, 0, sizeof (frag->hdr));
|
||||
|
||||
OPAL_THREAD_LOCK(&ugni_module->eager_get_pending_lock);
|
||||
pending_frag = (mca_btl_ugni_base_frag_t *) opal_list_remove_first (&ugni_module->eager_get_pending);
|
||||
OPAL_THREAD_UNLOCK(&ugni_module->eager_get_pending_lock);
|
||||
@ -68,6 +60,8 @@ static void mca_btl_ugni_callback_eager_get_progress_pending (struct mca_btl_bas
|
||||
/* copy the relevant data out of the pending fragment */
|
||||
frag->endpoint = pending_frag->endpoint;
|
||||
|
||||
assert (frag != pending_frag);
|
||||
|
||||
/* start the next eager get using this fragment */
|
||||
(void) mca_btl_ugni_start_eager_get (frag->endpoint, pending_frag->hdr.eager_ex, frag);
|
||||
|
||||
@ -80,19 +74,21 @@ static void mca_btl_ugni_callback_eager_get_progress_pending (struct mca_btl_bas
|
||||
}
|
||||
|
||||
static void mca_btl_ugni_callback_eager_get (struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint,
|
||||
struct mca_btl_base_descriptor_t *desc, int rc)
|
||||
void *local_address, mca_btl_base_registration_handle_t *local_handle,
|
||||
void *context, void *cbdata, int status)
|
||||
{
|
||||
mca_btl_ugni_module_t *ugni_module = (mca_btl_ugni_module_t *) btl;
|
||||
mca_btl_ugni_base_frag_t *frag = (mca_btl_ugni_base_frag_t *) desc;
|
||||
mca_btl_ugni_base_frag_t *frag = (mca_btl_ugni_base_frag_t *) context;
|
||||
uint32_t len = frag->hdr.eager.send.lag & 0x00ffffff;
|
||||
uint8_t tag = frag->hdr.eager.send.lag >> 24;
|
||||
size_t payload_len = frag->hdr.eager.src_seg.base.seg_len;
|
||||
size_t payload_len = frag->hdr.eager.size;
|
||||
size_t hdr_len = len - payload_len;
|
||||
mca_btl_active_message_callback_t *reg;
|
||||
mca_btl_base_segment_t segs[2];
|
||||
mca_btl_ugni_base_frag_t tmp;
|
||||
int rc;
|
||||
|
||||
BTL_VERBOSE(("eager get for rem_ctx %p complete", frag->hdr.eager.ctx));
|
||||
BTL_VERBOSE(("eager get for rem_ctx %p complete", frag->hdr.eager.ctx))
|
||||
|
||||
tmp.base.des_local = segs;
|
||||
if (hdr_len) {
|
||||
@ -100,19 +96,21 @@ static void mca_btl_ugni_callback_eager_get (struct mca_btl_base_module_t *btl,
|
||||
|
||||
segs[0].seg_addr.pval = frag->hdr.eager_ex.pml_header;
|
||||
segs[0].seg_len = hdr_len;
|
||||
segs[1].seg_addr.pval = frag->segments[0].base.seg_addr.pval;
|
||||
segs[1].seg_addr.pval = local_address;
|
||||
segs[1].seg_len = payload_len;
|
||||
} else {
|
||||
tmp.base.des_local_count = 1;
|
||||
|
||||
segs[0].seg_addr.pval = frag->segments[0].base.seg_addr.pval;
|
||||
segs[0].seg_addr.pval = local_address;
|
||||
segs[0].seg_len = payload_len;
|
||||
}
|
||||
|
||||
reg = mca_btl_base_active_message_trigger + tag;
|
||||
reg->cbfunc(&frag->endpoint->btl->super, tag, &(tmp.base), reg->cbdata);
|
||||
|
||||
/* fill in the response header */
|
||||
frag->hdr.rdma.ctx = frag->hdr.eager.ctx;
|
||||
frag->flags = MCA_BTL_UGNI_FRAG_RESPONSE;
|
||||
|
||||
/* once complete use this fragment for a pending eager get if any exist */
|
||||
frag->base.des_cbfunc = mca_btl_ugni_callback_eager_get_progress_pending;
|
||||
@ -122,6 +120,7 @@ static void mca_btl_ugni_callback_eager_get (struct mca_btl_base_module_t *btl,
|
||||
NULL, 0, MCA_BTL_UGNI_TAG_RDMA_COMPLETE);
|
||||
if (OPAL_UNLIKELY(0 > rc)) {
|
||||
/* queue fragment */
|
||||
OPAL_THREAD_LOCK(&endpoint->lock);
|
||||
if (false == endpoint->wait_listed) {
|
||||
OPAL_THREAD_LOCK(&ugni_module->ep_wait_list_lock);
|
||||
opal_list_append (&ugni_module->ep_wait_list, &endpoint->super);
|
||||
@ -129,50 +128,50 @@ static void mca_btl_ugni_callback_eager_get (struct mca_btl_base_module_t *btl,
|
||||
endpoint->wait_listed = true;
|
||||
}
|
||||
|
||||
OPAL_THREAD_LOCK(&endpoint->lock);
|
||||
opal_list_append (&endpoint->frag_wait_list, (opal_list_item_t *) frag);
|
||||
OPAL_THREAD_UNLOCK(&endpoint->lock);
|
||||
}
|
||||
}
|
||||
|
||||
int mca_btl_ugni_start_eager_get (mca_btl_base_endpoint_t *ep,
|
||||
int mca_btl_ugni_start_eager_get (mca_btl_base_endpoint_t *endpoint,
|
||||
mca_btl_ugni_eager_ex_frag_hdr_t hdr,
|
||||
mca_btl_ugni_base_frag_t *frag)
|
||||
{
|
||||
mca_btl_ugni_module_t *ugni_module = ep->btl;
|
||||
mca_btl_ugni_module_t *ugni_module = endpoint->btl;
|
||||
size_t size;
|
||||
int rc;
|
||||
|
||||
BTL_VERBOSE(("starting eager get for remote ctx: %p", hdr.eager.ctx));
|
||||
|
||||
do {
|
||||
if (NULL == frag) {
|
||||
rc = MCA_BTL_UGNI_FRAG_ALLOC_EAGER_RECV(ep, frag);
|
||||
/* try to allocate a registered buffer */
|
||||
rc = MCA_BTL_UGNI_FRAG_ALLOC_EAGER_RECV(endpoint, frag);
|
||||
if (OPAL_UNLIKELY(NULL == frag)) {
|
||||
(void) MCA_BTL_UGNI_FRAG_ALLOC_RDMA_INT(ep, frag);
|
||||
/* no registered buffers available. try again later */
|
||||
(void) MCA_BTL_UGNI_FRAG_ALLOC_RDMA_INT(endpoint, frag);
|
||||
|
||||
/* not much can be done if a small fragment can not be allocated. abort! */
|
||||
assert (NULL != frag);
|
||||
frag->hdr.eager_ex = hdr;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
frag->hdr.eager_ex = hdr;
|
||||
frag->flags = 0;
|
||||
|
||||
frag->base.des_flags = 0;
|
||||
frag->hdr.eager_ex = hdr;
|
||||
|
||||
frag->segments[1] = hdr.eager.src_seg;
|
||||
|
||||
/* increase size to a multiple of 4 bytes (required for get) */
|
||||
frag->segments[0].base.seg_len = frag->segments[1].base.seg_len =
|
||||
(hdr.eager.src_seg.base.seg_len + 3) & ~3;
|
||||
|
||||
frag->base.des_local = &frag->segments[1].base;
|
||||
/* increase size to a multiple of 4 bytes (required for get on Gemini) */
|
||||
size = (hdr.eager.size + 3) & ~3;
|
||||
|
||||
/* set up callback for get completion */
|
||||
frag->base.des_flags = MCA_BTL_DES_SEND_ALWAYS_CALLBACK;
|
||||
frag->base.des_cbfunc = mca_btl_ugni_callback_eager_get;
|
||||
|
||||
rc = mca_btl_ugni_post (frag, GNI_POST_RDMA_GET, frag->segments, frag->segments + 1);
|
||||
/* start the get */
|
||||
rc = mca_btl_ugni_post (endpoint, true, size, frag->base.super.ptr, hdr.eager.address,
|
||||
&frag->memory_handle, &hdr.eager.memory_handle,
|
||||
mca_btl_ugni_callback_eager_get, frag, NULL);
|
||||
if (OPAL_UNLIKELY(OPAL_SUCCESS == rc)) {
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
|
@ -27,13 +27,6 @@ mca_btl_ugni_free (struct mca_btl_base_module_t *btl,
|
||||
static int
|
||||
mca_btl_ugni_module_finalize (struct mca_btl_base_module_t* btl);
|
||||
|
||||
static mca_btl_base_descriptor_t *
|
||||
mca_btl_ugni_prepare_dst (mca_btl_base_module_t *btl,
|
||||
mca_btl_base_endpoint_t *endpoint,
|
||||
mca_mpool_base_registration_t *registration,
|
||||
opal_convertor_t *convertor, uint8_t order,
|
||||
size_t reserve, size_t *size, uint32_t flags);
|
||||
|
||||
static struct mca_btl_base_descriptor_t *
|
||||
mca_btl_ugni_prepare_src (struct mca_btl_base_module_t *btl,
|
||||
struct mca_btl_base_endpoint_t *endpoint,
|
||||
@ -42,20 +35,27 @@ mca_btl_ugni_prepare_src (struct mca_btl_base_module_t *btl,
|
||||
uint8_t order, size_t reserve, size_t *size,
|
||||
uint32_t flags);
|
||||
|
||||
static mca_btl_base_registration_handle_t *
|
||||
mca_btl_ugni_register_mem (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoint, void *base,
|
||||
size_t size, uint32_t flags);
|
||||
|
||||
static int mca_btl_ugni_deregister_mem (mca_btl_base_module_t *btl, mca_btl_base_registration_handle_t *handle);
|
||||
|
||||
mca_btl_ugni_module_t mca_btl_ugni_module = {
|
||||
.super = {
|
||||
.btl_component = &mca_btl_ugni_component.super,
|
||||
.btl_add_procs = mca_btl_ugni_add_procs,
|
||||
.btl_del_procs = mca_btl_ugni_del_procs,
|
||||
.btl_finalize = mca_btl_ugni_module_finalize,
|
||||
.btl_alloc = mca_btl_ugni_alloc,
|
||||
.btl_free = mca_btl_ugni_free,
|
||||
.btl_prepare_src = mca_btl_ugni_prepare_src,
|
||||
.btl_prepare_dst = mca_btl_ugni_prepare_dst,
|
||||
.btl_send = mca_btl_ugni_send,
|
||||
.btl_sendi = mca_btl_ugni_sendi,
|
||||
.btl_put = mca_btl_ugni_put,
|
||||
.btl_get = mca_btl_ugni_get,
|
||||
.btl_component = &mca_btl_ugni_component.super,
|
||||
.btl_add_procs = mca_btl_ugni_add_procs,
|
||||
.btl_del_procs = mca_btl_ugni_del_procs,
|
||||
.btl_finalize = mca_btl_ugni_module_finalize,
|
||||
.btl_alloc = mca_btl_ugni_alloc,
|
||||
.btl_free = mca_btl_ugni_free,
|
||||
.btl_prepare_src = mca_btl_ugni_prepare_src,
|
||||
.btl_send = mca_btl_ugni_send,
|
||||
.btl_sendi = mca_btl_ugni_sendi,
|
||||
.btl_put = mca_btl_ugni_put,
|
||||
.btl_get = mca_btl_ugni_get,
|
||||
.btl_register_mem = mca_btl_ugni_register_mem,
|
||||
.btl_deregister_mem = mca_btl_ugni_deregister_mem,
|
||||
}
|
||||
};
|
||||
|
||||
@ -92,6 +92,9 @@ mca_btl_ugni_module_init (mca_btl_ugni_module_t *ugni_module,
|
||||
OBJ_CONSTRUCT(&ugni_module->endpoints, opal_pointer_array_t);
|
||||
OBJ_CONSTRUCT(&ugni_module->id_to_endpoint, opal_hash_table_t);
|
||||
OBJ_CONSTRUCT(&ugni_module->smsg_mboxes, ompi_free_list_t);
|
||||
OBJ_CONSTRUCT(&ugni_module->pending_descriptors, opal_list_t);
|
||||
OBJ_CONSTRUCT(&ugni_module->eager_get_pending, opal_list_t);
|
||||
OBJ_CONSTRUCT(&ugni_module->post_descriptors, ompi_free_list_t);
|
||||
|
||||
ugni_module->device = dev;
|
||||
dev->btl_ctx = (void *) ugni_module;
|
||||
@ -188,7 +191,6 @@ mca_btl_ugni_module_finalize (struct mca_btl_base_module_t *btl)
|
||||
OBJ_DESTRUCT(&ugni_module->pending_smsg_frags_bb);
|
||||
OBJ_DESTRUCT(&ugni_module->id_to_endpoint);
|
||||
OBJ_DESTRUCT(&ugni_module->endpoints);
|
||||
OBJ_DESTRUCT(&ugni_module->failed_frags);
|
||||
|
||||
OBJ_DESTRUCT(&ugni_module->eager_get_pending);
|
||||
OBJ_DESTRUCT(&ugni_module->eager_get_pending_lock);
|
||||
@ -234,13 +236,13 @@ mca_btl_ugni_alloc(struct mca_btl_base_module_t *btl,
|
||||
|
||||
frag->base.des_flags = flags;
|
||||
frag->base.order = order;
|
||||
frag->base.des_local = &frag->segments[1].base;
|
||||
frag->base.des_local = &frag->segments[1];
|
||||
frag->base.des_local_count = 1;
|
||||
|
||||
frag->segments[0].base.seg_addr.pval = NULL;
|
||||
frag->segments[0].base.seg_len = 0;
|
||||
frag->segments[1].base.seg_addr.pval = frag->base.super.ptr;
|
||||
frag->segments[1].base.seg_len = size;
|
||||
frag->segments[0].seg_addr.pval = NULL;
|
||||
frag->segments[0].seg_len = 0;
|
||||
frag->segments[1].seg_addr.pval = frag->base.super.ptr;
|
||||
frag->segments[1].seg_len = size;
|
||||
|
||||
frag->flags = MCA_BTL_UGNI_FRAG_BUFFERED;
|
||||
if (size > mca_btl_ugni_component.smsg_max_data) {
|
||||
@ -251,7 +253,7 @@ mca_btl_ugni_alloc(struct mca_btl_base_module_t *btl,
|
||||
|
||||
registration = (mca_btl_ugni_reg_t *) frag->base.super.registration;
|
||||
|
||||
frag->segments[1].memory_handle = registration->memory_hdl;
|
||||
frag->hdr.eager.memory_handle = registration->handle;
|
||||
} else {
|
||||
frag->hdr_size = sizeof (frag->hdr.send);
|
||||
}
|
||||
@ -274,54 +276,32 @@ mca_btl_ugni_prepare_src (struct mca_btl_base_module_t *btl,
|
||||
uint8_t order, size_t reserve, size_t *size,
|
||||
uint32_t flags)
|
||||
{
|
||||
if (OPAL_LIKELY(reserve)) {
|
||||
return mca_btl_ugni_prepare_src_send (btl, endpoint, convertor,
|
||||
order, reserve, size, flags);
|
||||
} else {
|
||||
return mca_btl_ugni_prepare_src_rdma (btl, endpoint, registration,
|
||||
convertor, order, size, flags);
|
||||
}
|
||||
return mca_btl_ugni_prepare_src_send (btl, endpoint, convertor,
|
||||
order, reserve, size, flags);
|
||||
}
|
||||
|
||||
static mca_btl_base_descriptor_t *
|
||||
mca_btl_ugni_prepare_dst (mca_btl_base_module_t *btl,
|
||||
mca_btl_base_endpoint_t *endpoint,
|
||||
mca_mpool_base_registration_t *registration,
|
||||
opal_convertor_t *convertor, uint8_t order,
|
||||
size_t reserve, size_t *size, uint32_t flags)
|
||||
static mca_btl_base_registration_handle_t *
|
||||
mca_btl_ugni_register_mem (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoint, void *base,
|
||||
size_t size, uint32_t flags)
|
||||
{
|
||||
mca_btl_ugni_base_frag_t *frag;
|
||||
void *data_ptr;
|
||||
mca_btl_ugni_reg_t *reg;
|
||||
int rc;
|
||||
|
||||
opal_convertor_get_current_pointer (convertor, &data_ptr);
|
||||
|
||||
(void) MCA_BTL_UGNI_FRAG_ALLOC_RDMA(endpoint, frag);
|
||||
if (OPAL_UNLIKELY(NULL == frag)) {
|
||||
rc = btl->btl_mpool->mpool_register(btl->btl_mpool, base, size, 0,
|
||||
(mca_mpool_base_registration_t **) ®);
|
||||
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* always need to register the buffer for put/get (even for fma) */
|
||||
if (NULL == registration) {
|
||||
rc = btl->btl_mpool->mpool_register(btl->btl_mpool,
|
||||
data_ptr, *size, 0,
|
||||
®istration);
|
||||
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
|
||||
mca_btl_ugni_frag_return (frag);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
frag->registration = (mca_btl_ugni_reg_t*) registration;
|
||||
}
|
||||
|
||||
frag->segments[0].memory_handle = ((mca_btl_ugni_reg_t *)registration)->memory_hdl;
|
||||
frag->segments[0].base.seg_len = *size;
|
||||
frag->segments[0].base.seg_addr.lval = (uint64_t)(uintptr_t) data_ptr;
|
||||
|
||||
frag->base.des_local = &frag->segments->base;
|
||||
frag->base.des_local_count = 1;
|
||||
frag->base.order = order;
|
||||
frag->base.des_flags = flags;
|
||||
|
||||
return (struct mca_btl_base_descriptor_t *) frag;
|
||||
return ®->handle;
|
||||
}
|
||||
|
||||
static int mca_btl_ugni_deregister_mem (mca_btl_base_module_t *btl, mca_btl_base_registration_handle_t *handle)
|
||||
{
|
||||
mca_btl_ugni_reg_t *reg =
|
||||
(mca_btl_ugni_reg_t *)((intptr_t) handle - offsetof (mca_btl_ugni_reg_t, handle));
|
||||
|
||||
(void) btl->btl_mpool->mpool_deregister (btl->btl_mpool, ®->base);
|
||||
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
|
@ -35,13 +35,13 @@ mca_btl_ugni_prepare_src_send_nodata (struct mca_btl_base_module_t *btl,
|
||||
|
||||
frag->hdr_size = reserve + sizeof (frag->hdr.send);
|
||||
|
||||
frag->segments[0].base.seg_addr.pval = frag->hdr.send_ex.pml_header;
|
||||
frag->segments[0].base.seg_len = reserve;
|
||||
frag->segments[0].seg_addr.pval = frag->hdr.send_ex.pml_header;
|
||||
frag->segments[0].seg_len = reserve;
|
||||
|
||||
frag->segments[1].base.seg_addr.pval = NULL;
|
||||
frag->segments[1].base.seg_len = 0;
|
||||
frag->segments[1].seg_addr.pval = NULL;
|
||||
frag->segments[1].seg_len = 0;
|
||||
|
||||
frag->base.des_local = &frag->segments->base;
|
||||
frag->base.des_local = &frag->segments;
|
||||
frag->base.des_local_count = 1;
|
||||
frag->base.order = order;
|
||||
frag->base.des_flags = flags;
|
||||
@ -84,21 +84,21 @@ mca_btl_ugni_prepare_src_send_inplace (struct mca_btl_base_module_t *btl,
|
||||
frag->flags = MCA_BTL_UGNI_FRAG_EAGER | MCA_BTL_UGNI_FRAG_IGNORE;
|
||||
|
||||
frag->registration = registration;
|
||||
frag->segments[1].memory_handle = registration->memory_hdl;
|
||||
frag->hdr.eager.memory_handle = registration->handle;;
|
||||
|
||||
frag->hdr_size = reserve + sizeof (frag->hdr.eager);
|
||||
frag->segments[0].base.seg_addr.pval = frag->hdr.eager_ex.pml_header;
|
||||
frag->segments[0].seg_addr.pval = frag->hdr.eager_ex.pml_header;
|
||||
} else {
|
||||
frag->hdr_size = reserve + sizeof (frag->hdr.send);
|
||||
frag->segments[0].base.seg_addr.pval = frag->hdr.send_ex.pml_header;
|
||||
frag->segments[0].seg_addr.pval = frag->hdr.send_ex.pml_header;
|
||||
}
|
||||
|
||||
frag->segments[0].base.seg_len = reserve;
|
||||
frag->segments[0].seg_len = reserve;
|
||||
|
||||
frag->segments[1].base.seg_addr.pval = data_ptr;
|
||||
frag->segments[1].base.seg_len = *size;
|
||||
frag->segments[1].seg_addr.pval = data_ptr;
|
||||
frag->segments[1].seg_len = *size;
|
||||
|
||||
frag->base.des_local = &frag->segments->base;
|
||||
frag->base.des_local = &frag->segments;
|
||||
frag->base.des_local_count = 2;
|
||||
frag->base.order = order;
|
||||
frag->base.des_flags = flags;
|
||||
@ -130,10 +130,9 @@ mca_btl_ugni_prepare_src_send_buffered (struct mca_btl_base_module_t *btl,
|
||||
|
||||
registration = (mca_btl_ugni_reg_t *) frag->base.super.registration;
|
||||
|
||||
frag->segments[1].memory_handle = registration->memory_hdl;
|
||||
|
||||
frag->hdr.eager.memory_handle = registration->handle;
|
||||
frag->hdr_size = reserve + sizeof (frag->hdr.eager);
|
||||
frag->segments[0].base.seg_addr.pval = frag->hdr.eager_ex.pml_header;
|
||||
frag->segments[0].seg_addr.pval = frag->hdr.eager_ex.pml_header;
|
||||
} else {
|
||||
(void) MCA_BTL_UGNI_FRAG_ALLOC_SMSG(endpoint, frag);
|
||||
if (OPAL_UNLIKELY(NULL == frag)) {
|
||||
@ -141,7 +140,7 @@ mca_btl_ugni_prepare_src_send_buffered (struct mca_btl_base_module_t *btl,
|
||||
}
|
||||
|
||||
frag->hdr_size = reserve + sizeof (frag->hdr.send);
|
||||
frag->segments[0].base.seg_addr.pval = frag->hdr.send_ex.pml_header;
|
||||
frag->segments[0].seg_addr.pval = frag->hdr.send_ex.pml_header;
|
||||
}
|
||||
|
||||
frag->flags |= MCA_BTL_UGNI_FRAG_BUFFERED;
|
||||
@ -155,12 +154,12 @@ mca_btl_ugni_prepare_src_send_buffered (struct mca_btl_base_module_t *btl,
|
||||
return NULL;
|
||||
}
|
||||
|
||||
frag->segments[0].base.seg_len = reserve;
|
||||
frag->segments[0].seg_len = reserve;
|
||||
|
||||
frag->segments[1].base.seg_addr.pval = frag->base.super.ptr;
|
||||
frag->segments[1].base.seg_len = *size;
|
||||
frag->segments[1].seg_addr.pval = frag->base.super.ptr;
|
||||
frag->segments[1].seg_len = *size;
|
||||
|
||||
frag->base.des_local = &frag->segments->base;
|
||||
frag->base.des_local = &frag->segments;
|
||||
frag->base.des_local_count = 2;
|
||||
frag->base.order = order;
|
||||
frag->base.des_flags = flags;
|
||||
@ -197,66 +196,4 @@ mca_btl_ugni_prepare_src_send (struct mca_btl_base_module_t *btl,
|
||||
}
|
||||
}
|
||||
|
||||
static inline struct mca_btl_base_descriptor_t *
|
||||
mca_btl_ugni_prepare_src_rdma (struct mca_btl_base_module_t *btl,
|
||||
mca_btl_base_endpoint_t *endpoint,
|
||||
mca_mpool_base_registration_t *registration,
|
||||
struct opal_convertor_t *convertor,
|
||||
uint8_t order, size_t *size,
|
||||
uint32_t flags)
|
||||
{
|
||||
mca_btl_ugni_base_frag_t *frag;
|
||||
void *data_ptr;
|
||||
int rc;
|
||||
|
||||
opal_convertor_get_current_pointer (convertor, &data_ptr);
|
||||
|
||||
(void) MCA_BTL_UGNI_FRAG_ALLOC_RDMA(endpoint, frag);
|
||||
if (OPAL_UNLIKELY(NULL == frag)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* For medium message use FMA protocols and for large message
|
||||
* use BTE protocols
|
||||
*/
|
||||
/* No need to register while using FMA Put (registration is
|
||||
* non-null in get-- is this always true?) */
|
||||
if (*size >= mca_btl_ugni_component.ugni_fma_limit || (flags & MCA_BTL_DES_FLAGS_GET)) {
|
||||
if (NULL == registration) {
|
||||
rc = btl->btl_mpool->mpool_register(btl->btl_mpool, data_ptr, *size, 0,
|
||||
(mca_mpool_base_registration_t **) ®istration);
|
||||
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
|
||||
mca_btl_ugni_frag_return (frag);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
frag->registration = (mca_btl_ugni_reg_t *) registration;
|
||||
}
|
||||
|
||||
frag->segments[0].memory_handle = ((mca_btl_ugni_reg_t *)registration)->memory_hdl;
|
||||
} else {
|
||||
memset ((void *) &frag->segments[0].memory_handle, 0,
|
||||
sizeof (frag->segments[0].memory_handle));
|
||||
}
|
||||
|
||||
if ((flags & MCA_BTL_DES_FLAGS_GET) && (*size & 0x3)) {
|
||||
memmove (frag->segments[0].extra_bytes, (char *) data_ptr + (*size & ~0x3),
|
||||
*size & 0x3);
|
||||
frag->segments[0].extra_byte_count = *size & 0x3;
|
||||
} else {
|
||||
frag->segments[0].extra_byte_count = 0;
|
||||
}
|
||||
|
||||
frag->segments[0].base.seg_addr.lval = (uint64_t)(uintptr_t) data_ptr;
|
||||
frag->segments[0].base.seg_len = *size;
|
||||
|
||||
frag->base.des_local = &frag->segments->base;
|
||||
frag->base.des_local_count = 1;
|
||||
frag->base.order = order;
|
||||
frag->base.des_flags = flags;
|
||||
|
||||
return &frag->base;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -1,6 +1,6 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2011-2012 Los Alamos National Security, LLC. All rights
|
||||
* Copyright (c) 2011-2014 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
@ -14,25 +14,20 @@
|
||||
|
||||
#include "btl_ugni_rdma.h"
|
||||
|
||||
/**
|
||||
* Initiate a put operation.
|
||||
*
|
||||
* @param btl (IN) BTL module
|
||||
* @param endpoint (IN) BTL addressing information
|
||||
* @param descriptor (IN) Description of the data to be transferred
|
||||
*/
|
||||
int mca_btl_ugni_put (struct mca_btl_base_module_t *btl,
|
||||
struct mca_btl_base_endpoint_t *endpoint,
|
||||
struct mca_btl_base_descriptor_t *des) {
|
||||
mca_btl_ugni_base_frag_t *frag = (mca_btl_ugni_base_frag_t *) des;
|
||||
|
||||
BTL_VERBOSE(("Using RDMA/FMA Put for frag %p", (void *) des));
|
||||
void *local_address, uint64_t remote_address,
|
||||
struct mca_btl_base_registration_handle_t *local_handle,
|
||||
struct mca_btl_base_registration_handle_t *remote_handle,
|
||||
size_t size, int flags, mca_btl_base_rdma_completion_fn_t cbfunc,
|
||||
void *cbcontext, void *cbdata)
|
||||
{
|
||||
BTL_VERBOSE(("Using RDMA/FMA Put from local address %p to remote address %" PRIx64,
|
||||
local_address, remote_address));
|
||||
|
||||
/* cause endpoint to bind if it isn't already (bind is sufficient for rdma) */
|
||||
(void) mca_btl_ugni_check_endpoint_state(endpoint);
|
||||
|
||||
des->des_flags |= MCA_BTL_DES_SEND_ALWAYS_CALLBACK;
|
||||
|
||||
return mca_btl_ugni_post (frag, false, (mca_btl_ugni_segment_t *) des->des_local,
|
||||
(mca_btl_ugni_segment_t *) des->des_remote);
|
||||
return mca_btl_ugni_post (endpoint, false, size, local_address, remote_address, local_handle,
|
||||
remote_handle, cbfunc, cbcontext, cbdata);
|
||||
}
|
||||
|
@ -20,103 +20,144 @@ int mca_btl_ugni_start_eager_get (mca_btl_base_endpoint_t *ep,
|
||||
mca_btl_ugni_eager_ex_frag_hdr_t hdr,
|
||||
mca_btl_ugni_base_frag_t *frag);
|
||||
|
||||
static inline void init_gni_post_desc (mca_btl_ugni_base_frag_t *frag,
|
||||
gni_post_type_t op_type,
|
||||
uint64_t lcl_addr,
|
||||
gni_mem_handle_t lcl_mdh,
|
||||
uint64_t rem_addr,
|
||||
gni_mem_handle_t rem_mdh,
|
||||
uint64_t bufsize,
|
||||
gni_cq_handle_t cq_hndl) {
|
||||
frag->post_desc.base.type = op_type;
|
||||
frag->post_desc.base.cq_mode = GNI_CQMODE_GLOBAL_EVENT;
|
||||
frag->post_desc.base.dlvr_mode = GNI_DLVMODE_PERFORMANCE;
|
||||
frag->post_desc.base.local_addr = (uint64_t) lcl_addr;
|
||||
frag->post_desc.base.local_mem_hndl = lcl_mdh;
|
||||
frag->post_desc.base.remote_addr = (uint64_t) rem_addr;
|
||||
frag->post_desc.base.remote_mem_hndl = rem_mdh;
|
||||
frag->post_desc.base.length = bufsize;
|
||||
#if 0
|
||||
frag->post_desc.base.rdma_mode = GNI_RDMAMODE_FENCE;
|
||||
#endif
|
||||
frag->post_desc.base.rdma_mode = 0;
|
||||
frag->post_desc.base.src_cq_hndl = cq_hndl;
|
||||
frag->post_desc.tries = 0;
|
||||
static inline void init_gni_post_desc (opal_common_ugni_post_desc_t *post_desc,
|
||||
gni_post_type_t op_type,
|
||||
uint64_t lcl_addr,
|
||||
gni_mem_handle_t lcl_mdh,
|
||||
uint64_t rem_addr,
|
||||
gni_mem_handle_t rem_mdh,
|
||||
uint64_t bufsize,
|
||||
gni_cq_handle_t cq_hndl) {
|
||||
post_desc->base.type = op_type;
|
||||
post_desc->base.cq_mode = GNI_CQMODE_GLOBAL_EVENT;
|
||||
post_desc->base.dlvr_mode = GNI_DLVMODE_PERFORMANCE;
|
||||
post_desc->base.local_addr = (uint64_t) lcl_addr;
|
||||
post_desc->base.local_mem_hndl = lcl_mdh;
|
||||
post_desc->base.remote_addr = (uint64_t) rem_addr;
|
||||
post_desc->base.remote_mem_hndl = rem_mdh;
|
||||
post_desc->base.length = bufsize;
|
||||
post_desc->base.rdma_mode = 0;
|
||||
post_desc->base.src_cq_hndl = cq_hndl;
|
||||
post_desc->tries = 0;
|
||||
}
|
||||
|
||||
static inline int mca_btl_ugni_post_fma (mca_btl_ugni_base_frag_t *frag, gni_post_type_t op_type,
|
||||
mca_btl_ugni_segment_t *lcl_seg, mca_btl_ugni_segment_t *rem_seg)
|
||||
static inline int mca_btl_ugni_post_fma (struct mca_btl_base_endpoint_t *endpoint, gni_post_type_t op_type,
|
||||
size_t size, void *local_address, uint64_t remote_address,
|
||||
mca_btl_base_registration_handle_t *local_handle,
|
||||
mca_btl_base_registration_handle_t *remote_handle,
|
||||
mca_btl_base_rdma_completion_fn_t cbfunc,
|
||||
void *cbcontext, void *cbdata)
|
||||
{
|
||||
gni_return_t rc;
|
||||
mca_btl_ugni_post_descriptor_t *post_desc;
|
||||
gni_return_t grc;
|
||||
|
||||
/* Post descriptor (CQ is ignored for FMA transactions) */
|
||||
init_gni_post_desc (frag, op_type, lcl_seg->base.seg_addr.lval, lcl_seg->memory_handle,
|
||||
rem_seg->base.seg_addr.lval, rem_seg->memory_handle, lcl_seg->base.seg_len, 0);
|
||||
mca_btl_ugni_alloc_post_descriptor (endpoint, local_handle, cbfunc, cbcontext, cbdata, &post_desc);
|
||||
if (OPAL_UNLIKELY(NULL == post_desc)) {
|
||||
return OPAL_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
OPAL_THREAD_LOCK(&frag->endpoint->common->dev->dev_lock);
|
||||
rc = GNI_PostFma (frag->endpoint->rdma_ep_handle, &frag->post_desc.base);
|
||||
OPAL_THREAD_UNLOCK(&frag->endpoint->common->dev->dev_lock);
|
||||
if (GNI_RC_SUCCESS != rc) {
|
||||
BTL_VERBOSE(("GNI_PostFma failed with gni rc: %d", rc));
|
||||
/* Post descriptor (CQ is ignored for FMA transactions) -- The CQ associated with the endpoint
|
||||
* is used. */
|
||||
init_gni_post_desc (&post_desc->desc, op_type, (intptr_t) local_address, local_handle->gni_handle,
|
||||
remote_address, remote_handle->gni_handle, size, 0);
|
||||
|
||||
OPAL_THREAD_LOCK(&endpoint->btl->device->dev_lock);
|
||||
grc = GNI_PostFma (endpoint->rdma_ep_handle, &post_desc->desc.base);
|
||||
OPAL_THREAD_UNLOCK(&endpoint->btl->device->dev_lock);
|
||||
if (OPAL_UNLIKELY(GNI_RC_SUCCESS != grc)) {
|
||||
mca_btl_ugni_return_post_descriptor (endpoint->btl, post_desc);
|
||||
|
||||
if (GNI_RC_ALIGNMENT_ERROR == grc) {
|
||||
BTL_VERBOSE(("GNI_PostFma failed with an alignment error"));
|
||||
return OPAL_ERR_NOT_AVAILABLE;
|
||||
}
|
||||
|
||||
BTL_VERBOSE(("GNI_PostFma failed with gni rc: %d", grc));
|
||||
return OPAL_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
|
||||
static inline int mca_btl_ugni_post_bte (mca_btl_ugni_base_frag_t *frag, gni_post_type_t op_type,
|
||||
mca_btl_ugni_segment_t *lcl_seg, mca_btl_ugni_segment_t *rem_seg)
|
||||
static inline int mca_btl_ugni_post_bte (mca_btl_base_endpoint_t *endpoint, gni_post_type_t op_type,
|
||||
size_t size, void *local_address, uint64_t remote_address,
|
||||
mca_btl_base_registration_handle_t *local_handle,
|
||||
mca_btl_base_registration_handle_t *remote_handle,
|
||||
mca_btl_base_rdma_completion_fn_t cbfunc,
|
||||
void *cbcontext, void *cbdata)
|
||||
{
|
||||
gni_return_t status;
|
||||
mca_btl_ugni_post_descriptor_t *post_desc;
|
||||
gni_return_t grc;
|
||||
|
||||
mca_btl_ugni_alloc_post_descriptor (endpoint, local_handle, cbfunc, cbcontext, cbdata, &post_desc);
|
||||
if (OPAL_UNLIKELY(NULL == post_desc)) {
|
||||
return OPAL_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
/* Post descriptor */
|
||||
init_gni_post_desc (frag, op_type, lcl_seg->base.seg_addr.lval, lcl_seg->memory_handle,
|
||||
rem_seg->base.seg_addr.lval, rem_seg->memory_handle, lcl_seg->base.seg_len,
|
||||
frag->endpoint->btl->rdma_local_cq);
|
||||
init_gni_post_desc (&post_desc->desc, op_type, (intptr_t) local_address, local_handle->gni_handle,
|
||||
remote_address, remote_handle->gni_handle, size, endpoint->btl->rdma_local_cq);
|
||||
|
||||
OPAL_THREAD_LOCK(&frag->endpoint->common->dev->dev_lock);
|
||||
status = GNI_PostRdma (frag->endpoint->rdma_ep_handle, &frag->post_desc.base);
|
||||
OPAL_THREAD_UNLOCK(&frag->endpoint->common->dev->dev_lock);
|
||||
if (GNI_RC_SUCCESS != status) {
|
||||
BTL_VERBOSE(("GNI_PostRdma failed with gni rc: %d", status));
|
||||
return opal_common_rc_ugni_to_opal(status);
|
||||
OPAL_THREAD_LOCK(&endpoint->btl->device->dev_lock);
|
||||
grc = GNI_PostRdma (endpoint->rdma_ep_handle, &post_desc->desc.base);
|
||||
OPAL_THREAD_UNLOCK(&endpoint->btl->device->dev_lock);
|
||||
if (OPAL_UNLIKELY(GNI_RC_SUCCESS != grc)) {
|
||||
mca_btl_ugni_return_post_descriptor (endpoint->btl, post_desc);
|
||||
|
||||
if (GNI_RC_ALIGNMENT_ERROR == grc) {
|
||||
BTL_VERBOSE(("GNI_PostRdma failed with an alignment error"));
|
||||
return OPAL_ERR_NOT_AVAILABLE;
|
||||
}
|
||||
|
||||
BTL_VERBOSE(("GNI_PostRdma failed with gni rc: %d", grc));
|
||||
return OPAL_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
|
||||
static inline int mca_btl_ugni_post (mca_btl_ugni_base_frag_t *frag, bool get, mca_btl_ugni_segment_t *lcl_seg,
|
||||
mca_btl_ugni_segment_t *rem_seg) {
|
||||
static inline int mca_btl_ugni_post (mca_btl_base_endpoint_t *endpoint, int get, size_t size,
|
||||
void *local_address, uint64_t remote_address,
|
||||
mca_btl_base_registration_handle_t *local_handle,
|
||||
mca_btl_base_registration_handle_t *remote_handle,
|
||||
mca_btl_base_rdma_completion_fn_t cbfunc,
|
||||
void *cbcontext, void *cbdata)
|
||||
{
|
||||
const gni_post_type_t fma_ops[2] = {GNI_POST_FMA_PUT, GNI_POST_FMA_GET};
|
||||
const gni_post_type_t rdma_ops[2] = {GNI_POST_RDMA_PUT, GNI_POST_RDMA_GET};
|
||||
|
||||
if (frag->base.des_local->seg_len <= mca_btl_ugni_component.ugni_fma_limit) {
|
||||
return mca_btl_ugni_post_fma (frag, fma_ops[get], lcl_seg, rem_seg);
|
||||
if (size <= mca_btl_ugni_component.ugni_fma_limit) {
|
||||
return mca_btl_ugni_post_fma (endpoint, fma_ops[get], size, local_address, remote_address,
|
||||
local_handle, remote_handle, cbfunc, cbcontext, cbdata);
|
||||
}
|
||||
|
||||
return mca_btl_ugni_post_bte (frag, rdma_ops[get], lcl_seg, rem_seg);
|
||||
return mca_btl_ugni_post_bte (endpoint, rdma_ops[get], size, local_address, remote_address,
|
||||
local_handle, remote_handle, cbfunc, cbcontext, cbdata);
|
||||
}
|
||||
|
||||
static inline void mca_btl_ugni_repost (mca_btl_ugni_base_frag_t *frag) {
|
||||
static inline int mca_btl_ugni_repost (mca_btl_ugni_module_t *ugni_module, mca_btl_ugni_post_descriptor_t *post_desc)
|
||||
{
|
||||
gni_return_t grc;
|
||||
|
||||
OPAL_THREAD_LOCK(&frag->endpoint->common->dev->dev_lock);
|
||||
if (GNI_POST_RDMA_PUT == frag->post_desc.base.type ||
|
||||
GNI_POST_RDMA_GET == frag->post_desc.base.type) {
|
||||
grc = GNI_PostRdma (frag->endpoint->rdma_ep_handle, &frag->post_desc.base);
|
||||
OPAL_THREAD_LOCK(&ugni_module->device->dev_lock);
|
||||
if (GNI_POST_RDMA_PUT == post_desc->desc.base.type ||
|
||||
GNI_POST_RDMA_GET == post_desc->desc.base.type) {
|
||||
grc = GNI_PostRdma (post_desc->endpoint->rdma_ep_handle, &post_desc->desc.base);
|
||||
} else {
|
||||
grc = GNI_PostFma (frag->endpoint->rdma_ep_handle, &frag->post_desc.base);
|
||||
grc = GNI_PostFma (post_desc->endpoint->rdma_ep_handle, &post_desc->desc.base);
|
||||
}
|
||||
OPAL_THREAD_UNLOCK(&frag->endpoint->common->dev->dev_lock);
|
||||
OPAL_THREAD_UNLOCK(&ugni_module->device->dev_lock);
|
||||
|
||||
if (OPAL_UNLIKELY(GNI_RC_SUCCESS != grc)) {
|
||||
/* NTH: Should we even retry these? When this code was written there was no indication
|
||||
* whether an error in post is recoverable. Clobber this code and the associated data
|
||||
* structures if post errors are not recoverable. */
|
||||
OPAL_THREAD_LOCK(&frag->endpoint->btl->failed_frags_lock);
|
||||
opal_list_append (&frag->endpoint->btl->failed_frags, (opal_list_item_t *) frag);
|
||||
OPAL_THREAD_UNLOCK(&frag->endpoint->btl->failed_frags_lock);
|
||||
OPAL_THREAD_LOCK(&ugni_module->pending_descriptors_lock);
|
||||
opal_list_append (&ugni_module->pending_descriptors, (opal_list_item_t *) post_desc);
|
||||
OPAL_THREAD_UNLOCK(&ugni_module->pending_descriptors_lock);
|
||||
}
|
||||
|
||||
return opal_common_rc_ugni_to_opal (grc);
|
||||
}
|
||||
|
||||
#endif /* MCA_BTL_UGNI_RDMA_H */
|
||||
|
@ -23,7 +23,7 @@ int mca_btl_ugni_send (struct mca_btl_base_module_t *btl,
|
||||
mca_btl_base_tag_t tag)
|
||||
{
|
||||
mca_btl_ugni_base_frag_t *frag = (mca_btl_ugni_base_frag_t *) descriptor;
|
||||
size_t size = frag->segments[0].base.seg_len + frag->segments[1].base.seg_len;
|
||||
size_t size = frag->segments[0].seg_len + frag->segments[1].seg_len;
|
||||
mca_btl_ugni_module_t *ugni_module = (mca_btl_ugni_module_t *) btl;
|
||||
int flags_save = frag->base.des_flags;
|
||||
int rc;
|
||||
@ -41,7 +41,7 @@ int mca_btl_ugni_send (struct mca_btl_base_module_t *btl,
|
||||
}
|
||||
|
||||
BTL_VERBOSE(("btl/ugni sending descriptor %p from %d -> %d. length = %" PRIu64, (void *)descriptor,
|
||||
OPAL_PROC_MY_NAME.vpid, endpoint->common->ep_rem_id, frag->segments[0].base.seg_len));
|
||||
opal_process_name_vpid(OPAL_PROC_MY_NAME), endpoint->common->ep_rem_id, size));
|
||||
|
||||
/* temporarily disable ownership and callback flags so we can reliably check the complete flag */
|
||||
frag->base.des_flags &= ~(MCA_BTL_DES_FLAGS_BTL_OWNERSHIP | MCA_BTL_DES_SEND_ALWAYS_CALLBACK);
|
||||
@ -90,15 +90,15 @@ int mca_btl_ugni_send (struct mca_btl_base_module_t *btl,
|
||||
return rc;
|
||||
}
|
||||
|
||||
int
|
||||
mca_btl_ugni_sendi (struct mca_btl_base_module_t *btl,
|
||||
struct mca_btl_base_endpoint_t *endpoint,
|
||||
struct opal_convertor_t *convertor,
|
||||
void *header, size_t header_size,
|
||||
size_t payload_size, uint8_t order,
|
||||
uint32_t flags, mca_btl_base_tag_t tag,
|
||||
mca_btl_base_descriptor_t **descriptor)
|
||||
int mca_btl_ugni_sendi (struct mca_btl_base_module_t *btl,
|
||||
struct mca_btl_base_endpoint_t *endpoint,
|
||||
struct opal_convertor_t *convertor,
|
||||
void *header, size_t header_size,
|
||||
size_t payload_size, uint8_t order,
|
||||
uint32_t flags, mca_btl_base_tag_t tag,
|
||||
mca_btl_base_descriptor_t **descriptor)
|
||||
{
|
||||
mca_btl_ugni_module_t *ugni_module = (mca_btl_ugni_module_t *) btl;
|
||||
size_t total_size = header_size + payload_size;
|
||||
mca_btl_ugni_base_frag_t *frag = NULL;
|
||||
size_t packed_size = payload_size;
|
||||
@ -118,13 +118,14 @@ mca_btl_ugni_sendi (struct mca_btl_base_module_t *btl,
|
||||
frag = (mca_btl_ugni_base_frag_t *) mca_btl_ugni_prepare_src_send_buffered (btl, endpoint, convertor, order,
|
||||
header_size, &packed_size, flags);
|
||||
}
|
||||
|
||||
assert (packed_size == payload_size);
|
||||
if (OPAL_UNLIKELY(NULL == frag)) {
|
||||
break;
|
||||
}
|
||||
|
||||
frag->hdr.send.lag = (tag << 24) | total_size;
|
||||
memcpy (frag->segments[0].base.seg_addr.pval, header, header_size);
|
||||
memcpy (frag->segments[0].seg_addr.pval, header, header_size);
|
||||
|
||||
rc = mca_btl_ugni_send_frag (endpoint, frag);
|
||||
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
|
||||
@ -151,7 +152,13 @@ int mca_btl_ugni_progress_send_wait_list (mca_btl_base_endpoint_t *endpoint)
|
||||
if (NULL == frag) {
|
||||
break;
|
||||
}
|
||||
rc = mca_btl_ugni_send_frag (endpoint, frag);
|
||||
if (OPAL_LIKELY(!(frag->flags & MCA_BTL_UGNI_FRAG_RESPONSE))) {
|
||||
rc = mca_btl_ugni_send_frag (endpoint, frag);
|
||||
} else {
|
||||
rc = opal_mca_btl_ugni_smsg_send (frag, &frag->hdr.rdma, sizeof (frag->hdr.rdma),
|
||||
NULL, 0, MCA_BTL_UGNI_TAG_RDMA_COMPLETE);
|
||||
}
|
||||
|
||||
if (OPAL_UNLIKELY(OPAL_SUCCESS > rc)) {
|
||||
if (OPAL_LIKELY(OPAL_ERR_OUT_OF_RESOURCE == rc)) {
|
||||
OPAL_THREAD_LOCK(&endpoint->lock);
|
||||
|
@ -1,6 +1,6 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights
|
||||
* Copyright (c) 2011-2014 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
@ -26,7 +26,7 @@ static void mca_btl_ugni_smsg_mbox_construct (mca_btl_ugni_smsg_mbox_t *mbox) {
|
||||
mbox->attr.smsg_attr.mbox_offset = (uintptr_t) mbox->super.ptr - (uintptr_t) base_reg->base;
|
||||
mbox->attr.smsg_attr.msg_buffer = base_reg->base;
|
||||
mbox->attr.smsg_attr.buff_size = mca_btl_ugni_component.smsg_mbox_size;
|
||||
mbox->attr.smsg_attr.mem_hndl = ugni_reg->memory_hdl;
|
||||
mbox->attr.smsg_attr.mem_hndl = ugni_reg->handle.gni_handle;
|
||||
#if 0
|
||||
fprintf(stderr,"ugni_reg->memory_hdl 0x%lx 0x%lx\n",
|
||||
ugni_reg->memory_hdl.qword1,ugni_reg->memory_hdl.qword2);
|
||||
|
@ -1,6 +1,6 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights
|
||||
* Copyright (c) 2011-2014 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
@ -118,12 +118,13 @@ static inline int mca_btl_ugni_send_frag (struct mca_btl_base_endpoint_t *btl_pe
|
||||
mca_btl_ugni_base_frag_t *frag) {
|
||||
if (OPAL_LIKELY(!(frag->flags & MCA_BTL_UGNI_FRAG_EAGER))) {
|
||||
return opal_mca_btl_ugni_smsg_send (frag, &frag->hdr.send, frag->hdr_size,
|
||||
frag->segments[1].base.seg_addr.pval,
|
||||
frag->segments[1].base.seg_len,
|
||||
frag->segments[1].seg_addr.pval,
|
||||
frag->segments[1].seg_len,
|
||||
MCA_BTL_UGNI_TAG_SEND);
|
||||
}
|
||||
|
||||
frag->hdr.eager.src_seg = frag->segments[1];
|
||||
frag->hdr.eager.size = frag->segments[1].seg_len;
|
||||
frag->hdr.eager.address = frag->segments[1].seg_addr.lval;
|
||||
frag->hdr.eager.ctx = (void *) frag;
|
||||
|
||||
return opal_mca_btl_ugni_smsg_send (frag, &frag->hdr.eager, frag->hdr_size,
|
||||
|
@ -33,31 +33,30 @@
|
||||
* @param descriptor (IN) Description of the data to be transferred
|
||||
*/
|
||||
#if OPAL_BTL_VADER_HAVE_XPMEM
|
||||
int mca_btl_vader_get_xpmem (struct mca_btl_base_module_t *btl,
|
||||
struct mca_btl_base_endpoint_t *endpoint,
|
||||
struct mca_btl_base_descriptor_t *des)
|
||||
int mca_btl_vader_get (struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint,
|
||||
void *local_address, uint64_t remote_address,
|
||||
struct mca_btl_base_registration_handle_t *local_handle,
|
||||
struct mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags,
|
||||
mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata)
|
||||
{
|
||||
mca_btl_vader_frag_t *frag = (mca_btl_vader_frag_t *) des;
|
||||
mca_btl_base_segment_t *src = des->des_remote;
|
||||
mca_btl_base_segment_t *dst = des->des_local;
|
||||
const size_t size = min(dst->seg_len, src->seg_len);
|
||||
mca_mpool_base_registration_t *reg;
|
||||
void *rem_ptr;
|
||||
|
||||
reg = vader_get_registation (endpoint, src->seg_addr.pval, src->seg_len, 0, &rem_ptr);
|
||||
/* silence warning about unused arguments */
|
||||
(void) local_handle;
|
||||
(void) remote_handle;
|
||||
|
||||
reg = vader_get_registation (endpoint, (void *)(intptr_t) remote_address, size, 0, &rem_ptr);
|
||||
if (OPAL_UNLIKELY(NULL == rem_ptr)) {
|
||||
return OPAL_ERROR;
|
||||
}
|
||||
|
||||
vader_memmove (dst->seg_addr.pval, rem_ptr, size);
|
||||
vader_memmove (local_address, rem_ptr, size);
|
||||
|
||||
vader_return_registration (reg, endpoint);
|
||||
|
||||
/* always call the callback function */
|
||||
frag->base.des_flags |= MCA_BTL_DES_SEND_ALWAYS_CALLBACK;
|
||||
|
||||
frag->endpoint = endpoint;
|
||||
mca_btl_vader_frag_complete (frag);
|
||||
cbfunc (btl, endpoint, local_address, local_handle, cbcontext, cbdata, OPAL_SUCCESS);
|
||||
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
@ -68,12 +67,8 @@ int mca_btl_vader_get_cma (struct mca_btl_base_module_t *btl,
|
||||
struct mca_btl_base_endpoint_t *endpoint,
|
||||
struct mca_btl_base_descriptor_t *des)
|
||||
{
|
||||
mca_btl_vader_frag_t *frag = (mca_btl_vader_frag_t *) des;
|
||||
mca_btl_base_segment_t *src = des->des_remote;
|
||||
mca_btl_base_segment_t *dst = des->des_local;
|
||||
const size_t size = min(dst->seg_len, src->seg_len);
|
||||
struct iovec src_iov = {.iov_base = src->seg_addr.pval, .iov_len = size};
|
||||
struct iovec dst_iov = {.iov_base = dst->seg_addr.pval, .iov_len = size};
|
||||
struct iovec src_iov = {.iov_base = (void *)(intptr_t) remote_address, .iov_len = size};
|
||||
struct iovec dst_iov = {.iov_base = local_address, .iov_len = size};
|
||||
ssize_t ret;
|
||||
|
||||
ret = process_vm_readv (endpoint->segment_data.other.seg_ds->seg_cpid, &dst_iov, 1, &src_iov, 1, 0);
|
||||
|
@ -56,16 +56,6 @@ static struct mca_btl_base_descriptor_t *vader_prepare_src (
|
||||
uint32_t flags
|
||||
);
|
||||
|
||||
static struct mca_btl_base_descriptor_t *vader_prepare_dst (
|
||||
struct mca_btl_base_module_t *btl,
|
||||
struct mca_btl_base_endpoint_t *endpoint,
|
||||
struct mca_mpool_base_registration_t *registration,
|
||||
struct opal_convertor_t *convertor,
|
||||
uint8_t order,
|
||||
size_t reserve,
|
||||
size_t *size,
|
||||
uint32_t flags);
|
||||
|
||||
static int vader_add_procs(struct mca_btl_base_module_t* btl,
|
||||
size_t nprocs, struct opal_proc_t **procs,
|
||||
struct mca_btl_base_endpoint_t** peers,
|
||||
@ -82,7 +72,6 @@ mca_btl_vader_t mca_btl_vader = {
|
||||
.btl_alloc = mca_btl_vader_alloc,
|
||||
.btl_free = vader_free,
|
||||
.btl_prepare_src = vader_prepare_src,
|
||||
.btl_prepare_dst = vader_prepare_dst,
|
||||
.btl_send = mca_btl_vader_send,
|
||||
.btl_sendi = mca_btl_vader_sendi,
|
||||
.btl_dump = mca_btl_base_dump,
|
||||
@ -440,60 +429,6 @@ static int vader_free (struct mca_btl_base_module_t *btl, mca_btl_base_descripto
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
|
||||
struct mca_btl_base_descriptor_t *vader_prepare_dst(struct mca_btl_base_module_t *btl,
|
||||
struct mca_btl_base_endpoint_t *endpoint,
|
||||
struct mca_mpool_base_registration_t *registration,
|
||||
struct opal_convertor_t *convertor,
|
||||
uint8_t order, size_t reserve, size_t *size,
|
||||
uint32_t flags)
|
||||
{
|
||||
mca_btl_vader_frag_t *frag;
|
||||
void *data_ptr;
|
||||
|
||||
if (MCA_BTL_VADER_NONE != mca_btl_vader_component.single_copy_mechanism) {
|
||||
(void) MCA_BTL_VADER_FRAG_ALLOC_RDMA(frag, endpoint);
|
||||
} else {
|
||||
(void) MCA_BTL_VADER_FRAG_ALLOC_USER(frag, endpoint);
|
||||
}
|
||||
if (OPAL_UNLIKELY(NULL == frag)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
opal_convertor_get_current_pointer (convertor, &data_ptr);
|
||||
|
||||
frag->segments[0].base.seg_addr.lval = (uint64_t)(uintptr_t) data_ptr;
|
||||
frag->segments[0].base.seg_len = *size;
|
||||
|
||||
#if OPAL_BTL_VADER_HAVE_KNEM
|
||||
if (MCA_BTL_VADER_KNEM == mca_btl_vader_component.single_copy_mechanism) {
|
||||
struct knem_cmd_create_region knem_cr;
|
||||
struct knem_cmd_param_iovec knem_iov;
|
||||
|
||||
knem_iov.base = (uintptr_t) data_ptr;
|
||||
knem_iov.len = *size;
|
||||
|
||||
knem_cr.iovec_array = (uintptr_t) &knem_iov;
|
||||
knem_cr.iovec_nr = 1;
|
||||
knem_cr.protection = PROT_WRITE;
|
||||
/* Vader will explicitly destroy this cookie */
|
||||
knem_cr.flags = 0;
|
||||
if (OPAL_UNLIKELY(ioctl(mca_btl_vader.knem_fd, KNEM_CMD_CREATE_REGION, &knem_cr) < 0)) {
|
||||
MCA_BTL_VADER_FRAG_RETURN(frag);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
frag->segments[0].cookie = knem_cr.cookie;
|
||||
frag->segments[0].registered_base = (intptr_t) data_ptr;
|
||||
frag->cookie = knem_cr.cookie;
|
||||
}
|
||||
#endif /* OPAL_BTL_SM_HAVE_KNEM */
|
||||
|
||||
frag->base.order = order;
|
||||
frag->base.des_flags = flags;
|
||||
|
||||
return &frag->base;
|
||||
}
|
||||
|
||||
/**
|
||||
* Pack data
|
||||
*
|
||||
|
@ -35,31 +35,26 @@
|
||||
* @param descriptor (IN) Description of the data to be transferred
|
||||
*/
|
||||
#if OPAL_BTL_VADER_HAVE_XPMEM
|
||||
int mca_btl_vader_put_xpmem (struct mca_btl_base_module_t *btl,
|
||||
struct mca_btl_base_endpoint_t *endpoint,
|
||||
struct mca_btl_base_descriptor_t *des)
|
||||
int mca_btl_vader_put (struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint,
|
||||
void *local_address, uint64_t remote_address,
|
||||
struct mca_btl_base_registration_handle_t *local_handle,
|
||||
struct mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags,
|
||||
mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata)
|
||||
{
|
||||
mca_btl_vader_frag_t *frag = (mca_btl_vader_frag_t *) des;
|
||||
mca_btl_base_segment_t *src = des->des_local;
|
||||
mca_btl_base_segment_t *dst = des->des_remote;
|
||||
const size_t size = min(dst->seg_len, src->seg_len);
|
||||
mca_mpool_base_registration_t *reg;
|
||||
void *rem_ptr;
|
||||
|
||||
reg = vader_get_registation (endpoint, dst->seg_addr.pval, dst->seg_len, 0, &rem_ptr);
|
||||
reg = vader_get_registation (endpoint, (void *)(intptr_t) remote_address, size, 0, &rem_ptr);
|
||||
if (OPAL_UNLIKELY(NULL == reg)) {
|
||||
return OPAL_ERROR;
|
||||
}
|
||||
|
||||
vader_memmove (rem_ptr, src->seg_addr.pval, size);
|
||||
vader_memmove (rem_ptr, local_address, size);
|
||||
|
||||
vader_return_registration (reg, endpoint);
|
||||
|
||||
/* always call the callback function */
|
||||
frag->base.des_flags |= MCA_BTL_DES_SEND_ALWAYS_CALLBACK;
|
||||
|
||||
frag->endpoint = endpoint;
|
||||
mca_btl_vader_frag_complete (frag);
|
||||
cbfunc (btl, endpoint, local_address, local_handle, cbcontext, cbdata, OPAL_SUCCESS);
|
||||
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
@ -70,12 +65,8 @@ int mca_btl_vader_put_cma (struct mca_btl_base_module_t *btl,
|
||||
struct mca_btl_base_endpoint_t *endpoint,
|
||||
struct mca_btl_base_descriptor_t *des)
|
||||
{
|
||||
mca_btl_vader_frag_t *frag = (mca_btl_vader_frag_t *) des;
|
||||
mca_btl_base_segment_t *src = des->des_local;
|
||||
mca_btl_base_segment_t *dst = des->des_remote;
|
||||
const size_t size = min(dst->seg_len, src->seg_len);
|
||||
struct iovec src_iov = {.iov_base = src->seg_addr.pval, .iov_len = size};
|
||||
struct iovec dst_iov = {.iov_base = dst->seg_addr.pval, .iov_len = size};
|
||||
struct iovec src_iov = {.iov_base = local_address, .iov_len = size};
|
||||
struct iovec dst_iov = {.iov_base = (void *)(intptr_t) remote_address, .iov_len = size};
|
||||
ssize_t ret;
|
||||
|
||||
ret = process_vm_writev (endpoint->segment_data.other.seg_ds->seg_cpid, &src_iov, 1, &dst_iov, 1, 0);
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user