1
1
openmpi/ompi/mca/osc/rdma/osc_rdma_request.h
Nathan Hjelm d8df9d414d osc/rdma: add true RDMA one-sided component
This commit adds support for performing one-sided operations over
supported hardware (currently Infiniband and Cray Gemini/Aries). This
component is still undergoing active development.

Current features:

 - Use network atomic operations (fadd, cswap) for implementing
   locking and PSCW synchronization.

 - Aggregate small contiguous puts.

 - Reduced memory footprint by storing window data (pointer, keys,
   etc) at the lowest rank on each node. The data is fetched as each
   process needs to communicate with a new peer. This is a trade-off
   between the performance of the first operation on a peer and the
   memory utilization of a window.

TODO:

 - Add support for the accumulate_ops info key. If it is known that
   the same op or same op/no op is used it may be possible to use
   hardware atomics for fetch-and-op and compare-and-swap.

Signed-off-by: Nathan Hjelm <hjelmn@lanl.gov>
2015-09-16 15:01:33 -06:00

110 строки
3.8 KiB
C

/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2012 Sandia National Laboratories. All rights reserved.
* Copyright (c) 2014-2015 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#ifndef OMPI_OSC_RDMA_REQUEST_H
#define OMPI_OSC_RDMA_REQUEST_H
#include "osc_rdma.h"
enum ompi_osc_rdma_request_type_t {
OMPI_OSC_RDMA_TYPE_GET,
OMPI_OSC_RDMA_TYPE_PUT,
OMPI_OSC_RDMA_TYPE_RDMA,
OMPI_OSC_RDMA_TYPE_ACC,
OMPI_OSC_RDMA_TYPE_GET_ACC,
OMPI_OSC_RDMA_TYPE_CSWAP,
};
typedef enum ompi_osc_rdma_request_type_t ompi_osc_rdma_request_type_t;
struct ompi_osc_rdma_request_t {
ompi_request_t super;
ompi_osc_rdma_peer_t *peer;
ompi_osc_rdma_request_type_t type;
void *origin_addr;
int origin_count;
struct ompi_datatype_t *origin_dt;
void *result_addr;
int result_count;
struct ompi_datatype_t *result_dt;
const void *compare_addr;
ompi_op_t *op;
ompi_osc_rdma_module_t *module;
int32_t outstanding_requests;
bool internal;
ptrdiff_t offset;
size_t len;
void *ctx;
void *frag;
uint64_t target_address;
struct ompi_osc_rdma_request_t *parent_request;
/* used for non-contiguous get accumulate operations */
opal_convertor_t convertor;
/** synchronization object */
struct ompi_osc_rdma_sync_t *sync;
};
typedef struct ompi_osc_rdma_request_t ompi_osc_rdma_request_t;
OBJ_CLASS_DECLARATION(ompi_osc_rdma_request_t);
/* REQUEST_ALLOC is only called from "top-level" functions (rdma_rput,
rdma_rget, etc.), so it's ok to spin here... */
#define OMPI_OSC_RDMA_REQUEST_ALLOC(rmodule, rpeer, req) \
do { \
opal_free_list_item_t *item; \
do { \
item = opal_free_list_get (&mca_osc_rdma_component.requests); \
if (NULL == item) { \
ompi_osc_rdma_progress (rmodule); \
} \
} while (NULL == item); \
req = (ompi_osc_rdma_request_t*) item; \
OMPI_REQUEST_INIT(&req->super, false); \
req->super.req_mpi_object.win = module->win; \
req->super.req_complete = false; \
req->super.req_state = OMPI_REQUEST_ACTIVE; \
req->module = rmodule; \
req->internal = false; \
req->outstanding_requests = 0; \
req->parent_request = NULL; \
req->peer = (rpeer); \
} while (0)
#define OMPI_OSC_RDMA_REQUEST_RETURN(req) \
do { \
OMPI_REQUEST_FINI(&(req)->super); \
opal_free_list_return (&mca_osc_rdma_component.requests, \
(opal_free_list_item_t *) (req)); \
} while (0)
static inline void ompi_osc_rdma_request_complete (ompi_osc_rdma_request_t *request, int mpi_error)
{
if (!request->internal) {
request->super.req_status.MPI_ERROR = mpi_error;
/* mark the request complete at the mpi level */
ompi_request_complete (&request->super, true);
} else {
OMPI_OSC_RDMA_REQUEST_RETURN (request);
}
}
#endif /* OMPI_OSC_RDMA_REQUEST_H */