First crack at adding atomic operation support
Этот коммит содержится в:
родитель
249e5e009f
Коммит
2a70238f4d
@ -46,13 +46,15 @@ int mca_btl_base_param_register(mca_base_component_t *version,
|
||||
MCA_BASE_VAR_SCOPE_READONLY,
|
||||
&module->btl_exclusivity);
|
||||
|
||||
asprintf(&msg, "BTL bit flags (general flags: SEND=%d, PUT=%d, GET=%d, SEND_INPLACE=%d, RDMA_MATCHED=%d, HETEROGENEOUS_RDMA=%d; flags only used by the \"dr\" PML (ignored by others): ACK=%d, CHECKSUM=%d, RDMA_COMPLETION=%d; flags only used by the \"bfo\" PML (ignored by others): FAILOVER_SUPPORT=%d)",
|
||||
asprintf(&msg, "BTL bit flags (general flags: SEND=%d, PUT=%d, GET=%d, SEND_INPLACE=%d, HETEROGENEOUS_RDMA=%d, "
|
||||
"ATOMIC_OPS=%d; flags only used by the \"dr\" PML (ignored by others): ACK=%d, CHECKSUM=%d, "
|
||||
"RDMA_COMPLETION=%d; flags only used by the \"bfo\" PML (ignored by others): FAILOVER_SUPPORT=%d)",
|
||||
MCA_BTL_FLAGS_SEND,
|
||||
MCA_BTL_FLAGS_PUT,
|
||||
MCA_BTL_FLAGS_GET,
|
||||
MCA_BTL_FLAGS_SEND_INPLACE,
|
||||
MCA_BTL_FLAGS_RDMA_MATCHED,
|
||||
MCA_BTL_FLAGS_HETEROGENEOUS_RDMA,
|
||||
MCA_BTL_FLAGS_ATOMIC_OPS,
|
||||
MCA_BTL_FLAGS_NEED_ACK,
|
||||
MCA_BTL_FLAGS_NEED_CSUM,
|
||||
MCA_BTL_FLAGS_RDMA_COMPLETION,
|
||||
@ -64,6 +66,14 @@ int mca_btl_base_param_register(mca_base_component_t *version,
|
||||
&module->btl_flags);
|
||||
free(msg);
|
||||
|
||||
asprintf (&msg, "BTL atomic bit flags (general flags: ADD=%d, AND=%d, OR=%d, XOR=%d",
|
||||
MCA_BTL_ATOMIC_SUPPORTS_ADD, MCA_BTL_ATOMIC_SUPPORTS_AND, MCA_BTL_ATOMIC_SUPPORTS_OR,
|
||||
MCA_BTL_ATOMIC_SUPPORTS_XOR);
|
||||
(void) mca_base_component_var_register(version, "atomic_flags", msg, MCA_BASE_VAR_TYPE_UNSIGNED_INT,
|
||||
NULL, 0, MCA_BASE_VAR_FLAG_DEFAULT_ONLY, OPAL_INFO_LVL_5,
|
||||
MCA_BASE_VAR_SCOPE_CONSTANT, &module->btl_atomic_flags);
|
||||
free(msg);
|
||||
|
||||
(void) mca_base_component_var_register(version, "rndv_eager_limit", "Size (in bytes, including header) of \"phase 1\" fragment sent for all large messages (must be >= 0 and <= eager_limit)",
|
||||
MCA_BASE_VAR_TYPE_SIZE_T, NULL, 0, 0,
|
||||
OPAL_INFO_LVL_4,
|
||||
@ -178,6 +188,10 @@ int mca_btl_base_param_verify(mca_btl_base_module_t *module)
|
||||
module->btl_flags &= ~MCA_BTL_FLAGS_GET;
|
||||
}
|
||||
|
||||
if (0 == module->btl_atomic_flags) {
|
||||
module->btl_flags &= ~MCA_BTL_FLAGS_ATOMIC_OPS;
|
||||
}
|
||||
|
||||
if (0 == module->btl_get_limit) {
|
||||
module->btl_get_limit = SIZE_MAX;
|
||||
}
|
||||
|
@ -226,6 +226,10 @@ typedef uint8_t mca_btl_base_tag_t;
|
||||
*/
|
||||
#define MCA_BTL_FLAGS_SIGNALED 0x4000
|
||||
|
||||
|
||||
/** The BTL supports network atomic operations */
|
||||
#define MCA_BTL_FLAGS_ATOMIC_OPS 0x8000
|
||||
|
||||
/* Default exclusivity levels */
|
||||
#define MCA_BTL_EXCLUSIVITY_HIGH (64*1024) /* internal loopback */
|
||||
#define MCA_BTL_EXCLUSIVITY_DEFAULT 1024 /* GM/IB/etc. */
|
||||
@ -264,6 +268,32 @@ enum {
|
||||
#endif
|
||||
};
|
||||
|
||||
/** supported atomic operations */
|
||||
enum {
|
||||
/** The btl supports atomic add */
|
||||
MCA_BTL_ATOMIC_SUPPORTS_ADD = 0x00000001,
|
||||
/** The btl supports atomic bitwise and */
|
||||
MCA_BTL_ATOMIC_SUPPORTS_AND = 0x00000200,
|
||||
/** The btl supports atomic bitwise or */
|
||||
MCA_BTL_ATOMIC_SUPPORTS_OR = 0x00000400,
|
||||
/** The btl supports atomic bitwise exclusive or */
|
||||
MCA_BTL_ATOMIC_SUPPORTS_XOR = 0x00000800,
|
||||
/** The btl supports atomic compare-and-swap */
|
||||
MCA_BTL_ATOMIC_SUPPORTS_CSWAP = 0x80000000,
|
||||
};
|
||||
|
||||
enum mca_btl_base_atomic_op_t {
|
||||
/** Atomic add: (*remote_address) = (*remote_address) + operand */
|
||||
MCA_BTL_ATOMIC_ADD = 0x0001,
|
||||
/** Atomic and: (*remote_address) = (*remote_address) & operand */
|
||||
MCA_BTL_ATOMIC_AND = 0x0011,
|
||||
/** Atomic or: (*remote_address) = (*remote_address) | operand */
|
||||
MCA_BTL_ATOMIC_OR = 0x0012,
|
||||
/** Atomic xor: (*remote_address) = (*remote_address) ^ operand */
|
||||
MCA_BTL_ATOMIC_XOR = 0x0014,
|
||||
};
|
||||
typedef enum mca_btl_base_atomic_op_t mca_btl_base_atomic_op_t;
|
||||
|
||||
/**
|
||||
* Asynchronous callback function on completion of an operation.
|
||||
* Completion Semantics: The descriptor can be reused or returned to the
|
||||
@ -840,12 +870,6 @@ typedef int (*mca_btl_base_module_sendi_fn_t)(
|
||||
* until all outstanding operations on that handle
|
||||
* have been completed.
|
||||
*
|
||||
* BTLs that do not have the MCA_BTL_FLAGS_RDMA_MATCHED flag set
|
||||
* allow multiple concurrent put operations on the same descriptor.
|
||||
* BTLs that do have the MCA_BTL_FLAGS_RDMA_MATCHED flag set require
|
||||
* a corresponding prepare_src/dst call for each put operation and
|
||||
* therefore prohibit multiple concurrent put operations.
|
||||
*
|
||||
* @param btl (IN) BTL module
|
||||
* @param endpoint (IN) BTL addressing information
|
||||
* @param local_address (IN) Local address to put from (registered)
|
||||
@ -874,27 +898,6 @@ typedef int (*mca_btl_base_module_put_fn_t) (struct mca_btl_base_module_t *btl,
|
||||
struct mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags,
|
||||
int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata);
|
||||
|
||||
/**
|
||||
* Initiate an asynchronous get.
|
||||
*
|
||||
* Completion Semantics: the descriptor has been queued for a get operation
|
||||
* the BTL now controls the descriptor until local
|
||||
* completion callback is made on the descriptor
|
||||
*
|
||||
* BTLs that do not have the MCA_BTL_FLAGS_RDMA_MATCHED flag set
|
||||
* allow multiple concurrent get operations on the same descriptor.
|
||||
* BTLs that do have the MCA_BTL_FLAGS_RDMA_MATCHED flag set require
|
||||
* a corresponding prepare_src/dst call for each get operation and
|
||||
* therefore prohibit multiple concurrent get operations.
|
||||
*
|
||||
* @param btl (IN) BTL module
|
||||
* @param endpoint (IN) BTL addressing information
|
||||
* @param descriptor (IN) Description of the data to be transferred
|
||||
*
|
||||
* @retval OPAL_SUCCESS The descriptor was successfully queued for a get
|
||||
* @retval OPAL_ERROR The descriptor was NOT successfully queued for a get
|
||||
*
|
||||
*/
|
||||
/**
|
||||
* Initiate an asynchronous get.
|
||||
* Completion Semantics: if this function returns a 1 then the operation
|
||||
@ -904,12 +907,6 @@ typedef int (*mca_btl_base_module_put_fn_t) (struct mca_btl_base_module_t *btl,
|
||||
* until all outstanding operations on that handle
|
||||
* have been completed.
|
||||
*
|
||||
* BTLs that do not have the MCA_BTL_FLAGS_RDMA_MATCHED flag set
|
||||
* allow multiple concurrent put operations on the same descriptor.
|
||||
* BTLs that do have the MCA_BTL_FLAGS_RDMA_MATCHED flag set require
|
||||
* a corresponding prepare_src/dst call for each put operation and
|
||||
* therefore prohibit multiple concurrent put operations.
|
||||
*
|
||||
* @param btl (IN) BTL module
|
||||
* @param endpoint (IN) BTL addressing information
|
||||
* @param local_address (IN) Local address to put from (registered)
|
||||
@ -938,6 +935,139 @@ typedef int (*mca_btl_base_module_get_fn_t) (struct mca_btl_base_module_t *btl,
|
||||
struct mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags,
|
||||
int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata);
|
||||
|
||||
/**
|
||||
* Initiate an asynchronous atomic operation.
|
||||
* Completion Semantics: if this function returns a 1 then the operation
|
||||
* is complete. a return of OPAL_SUCCESS indicates
|
||||
* the atomic operation has been queued with the
|
||||
* network.
|
||||
*
|
||||
* @param btl (IN) BTL module
|
||||
* @param endpoint (IN) BTL addressing information
|
||||
* @param remote_address (IN) Remote address to put to (registered remotely)
|
||||
* @param remote_handle (IN) Remote registration handle for region containing
|
||||
* (remote_address, remote_address + 8)
|
||||
* @param op (IN) Operation to perform
|
||||
* @param operand (IN) Operand for the operation
|
||||
* @param flags (IN) Flags for this put operation
|
||||
* @param order (IN) Ordering
|
||||
* @param cbfunc (IN) Function to call on completion (if queued)
|
||||
* @param cbcontext (IN) Context for the callback
|
||||
* @param cbdata (IN) Data for callback
|
||||
*
|
||||
* @retval OPAL_SUCCESS The operation was successfully queued
|
||||
* @retval 1 The operation is complete
|
||||
* @retval OPAL_ERROR The operation was NOT successfully queued
|
||||
* @retval OPAL_ERR_OUT_OF_RESOURCE Insufficient resources to queue the atomic
|
||||
* operation. Try again later
|
||||
* @retval OPAL_ERR_NOT_AVAILABLE Atomic operation can not be performed due to
|
||||
* alignment restrictions or the operation {op} is not supported
|
||||
* by the hardware.
|
||||
*
|
||||
* After the operation is complete the remote address specified by {remote_address} and
|
||||
* {remote_handle} will be updated with (*remote_address) = (*remote_address) op operand.
|
||||
* The btl will guarantee consistency of atomic operations performed via the btl. Note,
|
||||
* however, that not all btls will provide consistency between btl atomic operations and
|
||||
* cpu atomics.
|
||||
*/
|
||||
typedef int (*mca_btl_base_module_atomic_op64_fn_t) (struct mca_btl_base_module_t *btl,
|
||||
struct mca_btl_base_endpoint_t *endpoint, uint64_t remote_address,
|
||||
struct mca_btl_base_registration_handle_t *remote_handle, mca_btl_base_atomic_op_t op,
|
||||
uint64_t operand, int flags, int order, mca_btl_base_rdma_completion_fn_t cbfunc,
|
||||
void *cbcontext, void *cbdata);
|
||||
|
||||
/**
|
||||
* Initiate an asynchronous fetching atomic operation.
|
||||
* Completion Semantics: if this function returns a 1 then the operation
|
||||
* is complete. a return of OPAL_SUCCESS indicates
|
||||
* the atomic operation has been queued with the
|
||||
* network.
|
||||
*
|
||||
* @param btl (IN) BTL module
|
||||
* @param endpoint (IN) BTL addressing information
|
||||
* @param local_address (OUT) Local address to store the result in
|
||||
* @param remote_address (IN) Remote address perfom operation on to (registered remotely)
|
||||
* @param local_handle (IN) Local registration handle for region containing
|
||||
* (local_address, local_address + 8)
|
||||
* @param remote_handle (IN) Remote registration handle for region containing
|
||||
* (remote_address, remote_address + 8)
|
||||
* @param op (IN) Operation to perform
|
||||
* @param operand (IN) Operand for the operation
|
||||
* @param flags (IN) Flags for this put operation
|
||||
* @param order (IN) Ordering
|
||||
* @param cbfunc (IN) Function to call on completion (if queued)
|
||||
* @param cbcontext (IN) Context for the callback
|
||||
* @param cbdata (IN) Data for callback
|
||||
*
|
||||
* @retval OPAL_SUCCESS The operation was successfully queued
|
||||
* @retval 1 The operation is complete
|
||||
* @retval OPAL_ERROR The operation was NOT successfully queued
|
||||
* @retval OPAL_ERR_OUT_OF_RESOURCE Insufficient resources to queue the atomic
|
||||
* operation. Try again later
|
||||
* @retval OPAL_ERR_NOT_AVAILABLE Atomic operation can not be performed due to
|
||||
* alignment restrictions or the operation {op} is not supported
|
||||
* by the hardware.
|
||||
*
|
||||
* After the operation is complete the remote address specified by {remote_address} and
|
||||
* {remote_handle} will be updated with (*remote_address) = (*remote_address) op operand.
|
||||
* {local_address} will be updated with the previous value stored in {remote_address}.
|
||||
* The btl will guarantee consistency of atomic operations performed via the btl. Note,
|
||||
* however, that not all btls will provide consistency between btl atomic operations and
|
||||
* cpu atomics.
|
||||
*/
|
||||
typedef int (*mca_btl_base_module_atomic_fop64_fn_t) (struct mca_btl_base_module_t *btl,
|
||||
struct mca_btl_base_endpoint_t *endpoint, void *local_address, uint64_t remote_address,
|
||||
struct mca_btl_base_registration_handle_t *local_handle,
|
||||
struct mca_btl_base_registration_handle_t *remote_handle, mca_btl_base_atomic_op_t op,
|
||||
uint64_t operand, int flags, int order, mca_btl_base_rdma_completion_fn_t cbfunc,
|
||||
void *cbcontext, void *cbdata);
|
||||
|
||||
/**
|
||||
* Initiate an asynchronous compare and swap operation.
|
||||
* Completion Semantics: if this function returns a 1 then the operation
|
||||
* is complete. a return of OPAL_SUCCESS indicates
|
||||
* the atomic operation has been queued with the
|
||||
* network.
|
||||
*
|
||||
* @param btl (IN) BTL module
|
||||
* @param endpoint (IN) BTL addressing information
|
||||
* @param local_address (OUT) Local address to store the result in
|
||||
* @param remote_address (IN) Remote address perfom operation on to (registered remotely)
|
||||
* @param local_handle (IN) Local registration handle for region containing
|
||||
* (local_address, local_address + 8)
|
||||
* @param remote_handle (IN) Remote registration handle for region containing
|
||||
* (remote_address, remote_address + 8)
|
||||
* @param compare (IN) Operand for the operation
|
||||
* @param value (IN) Value to store on success
|
||||
* @param flags (IN) Flags for this put operation
|
||||
* @param order (IN) Ordering
|
||||
* @param cbfunc (IN) Function to call on completion (if queued)
|
||||
* @param cbcontext (IN) Context for the callback
|
||||
* @param cbdata (IN) Data for callback
|
||||
*
|
||||
* @retval OPAL_SUCCESS The operation was successfully queued
|
||||
* @retval 1 The operation is complete
|
||||
* @retval OPAL_ERROR The operation was NOT successfully queued
|
||||
* @retval OPAL_ERR_OUT_OF_RESOURCE Insufficient resources to queue the atomic
|
||||
* operation. Try again later
|
||||
* @retval OPAL_ERR_NOT_AVAILABLE Atomic operation can not be performed due to
|
||||
* alignment restrictions or the operation {op} is not supported
|
||||
* by the hardware.
|
||||
*
|
||||
* After the operation is complete the remote address specified by {remote_address} and
|
||||
* {remote_handle} will be updated with {value} if *remote_address == compare.
|
||||
* {local_address} will be updated with the previous value stored in {remote_address}.
|
||||
* The btl will guarantee consistency of atomic operations performed via the btl. Note,
|
||||
* however, that not all btls will provide consistency between btl atomic operations and
|
||||
* cpu atomics.
|
||||
*/
|
||||
typedef int (*mca_btl_base_module_atomic_cswap_fn_t) (struct mca_btl_base_module_t *btl,
|
||||
struct mca_btl_base_endpoint_t *endpoint, void *local_address, uint64_t remote_address,
|
||||
struct mca_btl_base_registration_handle_t *local_handle,
|
||||
struct mca_btl_base_registration_handle_t *remote_handle, uint64_t compare,
|
||||
uint64_t value, int flags, int order, mca_btl_base_rdma_completion_fn_t cbfunc,
|
||||
void *cbcontext, void *cbdata);
|
||||
|
||||
/**
|
||||
* Diagnostic dump of btl state.
|
||||
*
|
||||
@ -976,6 +1106,7 @@ struct mca_btl_base_module_t {
|
||||
uint32_t btl_latency; /**< relative ranking of latency used to prioritize btls */
|
||||
uint32_t btl_bandwidth; /**< bandwidth (Mbytes/sec) supported by each endpoint */
|
||||
uint32_t btl_flags; /**< flags (put/get...) */
|
||||
uint32_t btl_atomic_flags; /**< atomic operations supported (add, and, xor, etc) */
|
||||
size_t btl_registration_handle_size; /**< size of the BTLs registration handles */
|
||||
|
||||
/* One-sided limitations (0 for no alignment, SIZE_MAX for no limit ) */
|
||||
@ -999,6 +1130,11 @@ struct mca_btl_base_module_t {
|
||||
mca_btl_base_module_get_fn_t btl_get;
|
||||
mca_btl_base_module_dump_fn_t btl_dump;
|
||||
|
||||
/* atomic operations */
|
||||
mca_btl_base_module_atomic_op64_fn_t btl_atomic_op;
|
||||
mca_btl_base_module_atomic_fop64_fn_t btl_atomic_fop;
|
||||
mca_btl_base_module_atomic_cswap_fn_t btl_atomic_cswap;
|
||||
|
||||
/* new memory registration functions */
|
||||
mca_btl_base_module_register_mem_fn_t btl_register_mem; /**< memory registration function (NULL if not needed) */
|
||||
mca_btl_base_module_deregister_mem_fn_t btl_deregister_mem; /**< memory deregistration function (NULL if not needed) */
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user