1
1

libfabric: sync to upstream libfabric github

Bring down the latest from the libfabric github, as of
9d051567c8eb7adc2af89516f94c7d0539152948.
Этот коммит содержится в:
Jeff Squyres 2014-12-03 14:25:39 -08:00
родитель 7cd4832a0d
Коммит c4e8d67515
126 изменённых файлов: 8133 добавлений и 7582 удалений

Просмотреть файл

@ -248,127 +248,125 @@ nodist_rdmainclude_HEADERS = \
$(top_srcdir)/prov/$(PROVIDER_DIRECT)/include/rdma/fi_direct_trigger.h
endif HAVE_DIRECT
man_MANS = \
man/fabric.7 \
man/fi_accept.3 \
man/fi_alias.3 \
man/fi_atomic.3 \
man/fi_atomicv.3 \
man/fi_atomicto.3 \
man/fi_atomicmsg.3 \
man/fi_atomic_valid.3 \
man/fi_av.3 \
man/fi_av_bind.3 \
man/fi_av_insert.3 \
man/fi_av_insertsvc.3 \
man/fi_av_lookup.3 \
man/fi_av_open.3 \
man/fi_av_remove.3 \
man/fi_av_straddr.3 \
man/fi_cancel.3 \
man/fi_close.3 \
man/fi_cm.3 \
man/fi_compare_atomic.3 \
man/fi_compare_atomicv.3 \
man/fi_compare_atomicto.3 \
man/fi_compare_atomicmsg.3 \
man/fi_compare_atomic_valid.3 \
man/fi_connect.3 \
man/fi_cntr.3 \
man/fi_cntr_open.3 \
man/fi_cntr_read.3 \
man/fi_cntr_add.3 \
man/fi_cntr_set.3 \
man/fi_cntr_wait.3 \
man/fi_cq.3 \
man/fi_cq_open.3 \
man/fi_cq_read.3 \
man/fi_cq_readfrom.3 \
man/fi_cq_readerr.3 \
man/fi_cq_write.3 \
man/fi_cq_sread.3 \
man/fi_cq_sreadfrom.3 \
man/fi_cq_strerror.3 \
man/fi_direct.7 \
man/fi_domain.3 \
man/fi_domain_query.3 \
man/fi_domain_bind.3 \
man/fi_dupinfo.3 \
man/fi_ep_bind.3 \
man/fi_ep_sync.3 \
man/fi_eq.3 \
man/fi_eq_open.3 \
man/fi_eq_read.3 \
man/fi_eq_readerr.3 \
man/fi_eq_write.3 \
man/fi_eq_sread.3 \
man/fi_eq_strerror.3 \
man/fi_enable.3 \
man/fi_endpoint.3 \
man/fi_fabric.3 \
man/fi_fetch_atomic.3 \
man/fi_fetch_atomicv.3 \
man/fi_fetch_atomicto.3 \
man/fi_fetch_atomicmsg.3 \
man/fi_fetch_atomic_valid.3 \
man/fi_freeinfo.3 \
man/fi_getinfo.3 \
man/fi_getname.3 \
man/fi_getopt.3 \
man/fi_getpeer.3 \
man/fi_inject.3 \
man/fi_injectto.3 \
man/fi_join.3 \
man/fi_leave.3 \
man/fi_listen.3 \
man/fi_mr.3 \
man/fi_mr_reg.3 \
man/fi_mr_regv.3 \
man/fi_mr_regattr.3 \
man/fi_mr_desc.3 \
man/fi_mr_key.3 \
man/fi_mr_bind.3 \
man/fi_msg.3 \
man/fi_open.3 \
man/fi_pendpoint.3 \
man/fi_poll.3 \
man/fi_poll_add.3 \
man/fi_poll_del.3 \
man/fi_poll_open.3 \
man/fi_recv.3 \
man/fi_recvv.3 \
man/fi_recvfrom.3 \
man/fi_recvmsg.3 \
man/fi_reject.3 \
man/fi_rma.3 \
man/fi_rx_addr.3 \
man/fi_send.3 \
man/fi_senddata.3 \
man/fi_senddatato.3 \
man/fi_sendv.3 \
man/fi_sendto.3 \
man/fi_sendmsg.3 \
man/fi_setopt.3 \
man/fi_shutdown.3 \
man/fi_tagged.3 \
man/fi_tinject.3 \
man/fi_tinjectto.3 \
man/fi_tostr.3 \
man/fi_trecv.3 \
man/fi_trecvv.3 \
man/fi_trecvfrom.3 \
man/fi_trecvmsg.3 \
man/fi_trigger.3 \
man/fi_tsearch.3 \
man/fi_tsend.3 \
man/fi_tsenddata.3 \
man/fi_tsenddatato.3 \
man/fi_tsendv.3 \
man/fi_tsendto.3 \
man/fi_tsendmsg.3 \
man/fi_version.3 \
man/fi_wait.3 \
man/fi_wait_open.3
real_man_pages = \
man/fabric.7 \
man/fi_av.3 \
man/fi_cm.3 \
man/fi_cntr.3 \
man/fi_cq.3 \
man/fi_direct.7 \
man/fi_domain.3 \
man/fi_endpoint.3 \
man/fi_eq.3 \
man/fi_fabric.3 \
man/fi_getinfo.3 \
man/fi_mr.3 \
man/fi_msg.3 \
man/fi_poll.3 \
man/fi_rma.3 \
man/fi_tagged.3 \
man/fi_trigger.3 \
man/fi_version.3
dummy_man_pages = \
man/fi_accept.3 \
man/fi_alias.3 \
man/fi_atomic_valid.3 \
man/fi_atomicmsg.3 \
man/fi_atomicv.3 \
man/fi_av_bind.3 \
man/fi_av_insert.3 \
man/fi_av_insertsvc.3 \
man/fi_av_lookup.3 \
man/fi_av_open.3 \
man/fi_av_remove.3 \
man/fi_av_straddr.3 \
man/fi_cancel.3 \
man/fi_close.3 \
man/fi_cntr_add.3 \
man/fi_cntr_open.3 \
man/fi_cntr_read.3 \
man/fi_cntr_set.3 \
man/fi_cntr_wait.3 \
man/fi_compare_atomic.3 \
man/fi_compare_atomic_valid.3 \
man/fi_compare_atomicmsg.3 \
man/fi_compare_atomicv.3 \
man/fi_connect.3 \
man/fi_cq_open.3 \
man/fi_cq_read.3 \
man/fi_cq_readerr.3 \
man/fi_cq_readfrom.3 \
man/fi_cq_sread.3 \
man/fi_cq_sreadfrom.3 \
man/fi_cq_strerror.3 \
man/fi_cq_write.3 \
man/fi_domain_bind.3 \
man/fi_domain_query.3 \
man/fi_dupinfo.3 \
man/fi_enable.3 \
man/fi_ep_bind.3 \
man/fi_eq_open.3 \
man/fi_eq_read.3 \
man/fi_eq_readerr.3 \
man/fi_eq_sread.3 \
man/fi_eq_strerror.3 \
man/fi_eq_write.3 \
man/fi_fetch_atomic.3 \
man/fi_fetch_atomic_valid.3 \
man/fi_fetch_atomicmsg.3 \
man/fi_fetch_atomicv.3 \
man/fi_freeinfo.3 \
man/fi_getname.3 \
man/fi_getopt.3 \
man/fi_getpeer.3 \
man/fi_inject.3 \
man/fi_inject_atomic.3 \
man/fi_join.3 \
man/fi_leave.3 \
man/fi_listen.3 \
man/fi_mr_bind.3 \
man/fi_mr_desc.3 \
man/fi_mr_key.3 \
man/fi_mr_reg.3 \
man/fi_mr_regattr.3 \
man/fi_mr_regv.3 \
man/fi_open.3 \
man/fi_pendpoint.3 \
man/fi_poll_add.3 \
man/fi_poll_del.3 \
man/fi_poll_open.3 \
man/fi_recv.3 \
man/fi_recvmsg.3 \
man/fi_recvv.3 \
man/fi_reject.3 \
man/fi_rx_addr.3 \
man/fi_send.3 \
man/fi_senddata.3 \
man/fi_sendmsg.3 \
man/fi_sendv.3 \
man/fi_setopt.3 \
man/fi_shutdown.3 \
man/fi_strerror.3 \
man/fi_tinject.3 \
man/fi_tostr.3 \
man/fi_trecv.3 \
man/fi_trecvmsg.3 \
man/fi_trecvv.3 \
man/fi_tsearch.3 \
man/fi_tsend.3 \
man/fi_tsenddata.3 \
man/fi_tsendmsg.3 \
man/fi_tsendv.3 \
man/fi_wait.3 \
man/fi_wait_open.3
man_MANS = $(real_man_pages) $(dummy_man_pages)
nroff:
@for file in $(real_man_pages); do \
config/md2nroff.pl --source=$$file.md; \
done
EXTRA_DIST = libfabric.map libfabric.spec.in config/distscript.pl $(man_MANS)

Просмотреть файл

@ -1,7 +1,7 @@
This README is for userspace RDMA fabric library.
Version Libfabric v0.0.2
Released on 2014-11-11
Released on 2014-12-03
Building
========

Просмотреть файл

@ -132,13 +132,24 @@ AC_CACHE_CHECK(whether ld accepts --version-script, ac_cv_version_script,
AM_CONDITIONAL(HAVE_LD_VERSION_SCRIPT, test "$ac_cv_version_script" = "yes")
dnl Disable symbol versioning with icc + ipo, with it ipo is disabled by icc.
dnl The gcc equivalent ipo (-fwhole-program) seems to work fine.
AS_IF([case "$CFLAGS" in
*-ipo*) true ;;
*) false ;;
esac],
[AC_MSG_NOTICE([disabling symbol versioning support with -ipo CFLAG])],
[
AC_CACHE_CHECK(for .symver assembler support, ac_cv_asm_symver_support,
[AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[]],
[[asm("symbol:\n.symver symbol, api@ABI\n");]])],
[ac_cv_asm_symver_support=yes],
[ac_cv_asm_symver_support=no])])
if test $ac_cv_asm_symver_support = yes; then
]) dnl AS_IF
if test x$ac_cv_asm_symver_support = xyes; then
AC_DEFINE([HAVE_SYMVER_SUPPORT], 1, [assembler has .symver support])
fi

Просмотреть файл

@ -185,7 +185,6 @@ int fi_poll_fd(int fd, int timeout);
int fi_wait_cond(pthread_cond_t *cond, pthread_mutex_t *mut, int timeout);
struct fi_info *fi_allocinfo_internal(void);
void fi_freeinfo_internal(struct fi_info *info);
int fi_sockaddr_len(struct sockaddr *addr);
size_t fi_datatype_size(enum fi_datatype datatype);

Просмотреть файл

@ -48,23 +48,21 @@ extern "C" {
/*
static struct fi_ops X = {
.size = sizeof(struct fi_ops);,
.size = sizeof(struct fi_ops),
.close = X,
.bind = fi_no_bind,
.sync = fi_no_sync,
.control = fi_no_control,
.ops_open = fi_no_ops_open,
};
*/
int fi_no_bind(struct fid *fid, struct fid *bfid, uint64_t flags);
int fi_no_sync(struct fid *fid, uint64_t flags, void *context);
int fi_no_control(struct fid *fid, int command, void *arg);
int fi_no_ops_open(struct fid *fid, const char *name,
uint64_t flags, void **ops, void *context);
/*
static struct fi_ops_fabric X = {
.size = sizeof(struct fi_ops_fabric);,
.size = sizeof(struct fi_ops_fabric),
.domain = fi_no_domain,
.endpoint = fi_no_pendpoint,
.eq_open = fi_no_eq_open,
@ -79,20 +77,16 @@ int fi_no_eq_open(struct fid_fabric *fabric, struct fi_eq_attr *attr,
/*
static struct fi_ops_atomic X = {
.size = sizeof(struct fi_ops_atomic);,
.size = sizeof(struct fi_ops_atomic),
.write = fi_no_atomic_write,
.writev = fi_no_atomic_writev,
.writeto = fi_no_atomic_writeto,
.writemsg = fi_no_atomic_writemsg,
.inject = fi_no_atomic_inject,
.injectto = fi_no_atomic_injectto,
.readwrite = fi_no_atomic_readwrite,
.readwritev = fi_no_atomic_readwritev,
.readwriteto = fi_no_atomic_readwriteto,
.readwritemsg = fi_no_atomic_readwritemsg,
.compwrite = fi_no_atomic_compwrite,
.compwritev = fi_no_atomic_compwritev,
.compwriteto = fi_no_atomic_compwriteto,
.compwritemsg = fi_no_atomic_compwritemsg,
.writevalid = fi_no_atomic_writevalid,
.readwritevalid = fi_no_atomic_readwritevalid,
@ -101,40 +95,26 @@ static struct fi_ops_atomic X = {
*/
ssize_t fi_no_atomic_write(struct fid_ep *ep,
const void *buf, size_t count, void *desc,
uint64_t addr, uint64_t key,
fi_addr_t dest_addr, uint64_t addr, uint64_t key,
enum fi_datatype datatype, enum fi_op op, void *context);
ssize_t fi_no_atomic_writev(struct fid_ep *ep,
const struct fi_ioc *iov, void **desc, size_t count,
uint64_t addr, uint64_t key,
enum fi_datatype datatype, enum fi_op op, void *context);
ssize_t fi_no_atomic_writeto(struct fid_ep *ep,
const void *buf, size_t count, void *desc,
fi_addr_t dest_addr,
uint64_t addr, uint64_t key,
fi_addr_t dest_addr, uint64_t addr, uint64_t key,
enum fi_datatype datatype, enum fi_op op, void *context);
ssize_t fi_no_atomic_writemsg(struct fid_ep *ep,
const struct fi_msg_atomic *msg, uint64_t flags);
ssize_t fi_no_atomic_inject(struct fid_ep *ep, const void *buf, size_t count,
uint64_t addr, uint64_t key, enum fi_datatype datatype,
enum fi_op op);
ssize_t fi_no_atomic_injectto(struct fid_ep *ep, const void *buf, size_t count,
fi_addr_t dest_addr, uint64_t addr, uint64_t key,
enum fi_datatype datatype, enum fi_op op);
ssize_t fi_no_atomic_readwrite(struct fid_ep *ep,
const void *buf, size_t count, void *desc,
void *result, void *result_desc,
uint64_t addr, uint64_t key,
fi_addr_t dest_addr, uint64_t addr, uint64_t key,
enum fi_datatype datatype, enum fi_op op, void *context);
ssize_t fi_no_atomic_readwritev(struct fid_ep *ep,
const struct fi_ioc *iov, void **desc, size_t count,
struct fi_ioc *resultv, void **result_desc, size_t result_count,
uint64_t addr, uint64_t key,
enum fi_datatype datatype, enum fi_op op, void *context);
ssize_t fi_no_atomic_readwriteto(struct fid_ep *ep,
const void *buf, size_t count, void *desc,
void *result, void *result_desc,
fi_addr_t dest_addr,
uint64_t addr, uint64_t key,
fi_addr_t dest_addr, uint64_t addr, uint64_t key,
enum fi_datatype datatype, enum fi_op op, void *context);
ssize_t fi_no_atomic_readwritemsg(struct fid_ep *ep,
const struct fi_msg_atomic *msg,
@ -144,20 +124,13 @@ ssize_t fi_no_atomic_compwrite(struct fid_ep *ep,
const void *buf, size_t count, void *desc,
const void *compare, void *compare_desc,
void *result, void *result_desc,
uint64_t addr, uint64_t key,
fi_addr_t dest_addr, uint64_t addr, uint64_t key,
enum fi_datatype datatype, enum fi_op op, void *context);
ssize_t fi_no_atomic_compwritev(struct fid_ep *ep,
const struct fi_ioc *iov, void **desc, size_t count,
const struct fi_ioc *comparev, void **compare_desc, size_t compare_count,
struct fi_ioc *resultv, void **result_desc, size_t result_count,
uint64_t addr, uint64_t key,
enum fi_datatype datatype, enum fi_op op, void *context);
ssize_t fi_no_atomic_compwriteto(struct fid_ep *ep,
const void *buf, size_t count, void *desc,
const void *compare, void *compare_desc,
void *result, void *result_desc,
fi_addr_t dest_addr,
uint64_t addr, uint64_t key,
fi_addr_t dest_addr, uint64_t addr, uint64_t key,
enum fi_datatype datatype, enum fi_op op, void *context);
ssize_t fi_no_atomic_compwritemsg(struct fid_ep *ep,
const struct fi_msg_atomic *msg,
@ -173,7 +146,7 @@ int fi_no_atomic_compwritevalid(struct fid_ep *ep,
/*
static struct fi_ops_cm X = {
.size = sizeof(struct fi_ops_cm);,
.size = sizeof(struct fi_ops_cm),
.getname = X,
.getpeer = fi_no_getpeer,
.connect = fi_no_connect,
@ -201,7 +174,7 @@ int fi_no_leave(struct fid_ep *ep, void *addr, fi_addr_t fi_addr,
/*
static struct fi_ops_av X = {
.size = sizeof(struct fi_ops_av);,
.size = sizeof(struct fi_ops_av),
.insert = X,
.insertsvc = X,
.insertsym = X,
@ -213,13 +186,15 @@ static struct fi_ops_av X = {
/*
static struct fi_ops_domain X = {
.size = sizeof(struct fi_ops_domain);,
.size = sizeof(struct fi_ops_domain),
.av_open = fi_no_av_open,
.cq_open = fi_no_cq_open,
.endpoint = fi_no_endpoint,
.cntr_open = fi_no_cntr_open,
.wait_open = fi_no_wait_open,
.poll_open = fi_no_poll_open,
.stx_ctx = fi_no_stx_context,
.srx_ctx = fi_no_srx_context,
};
*/
int fi_no_av_open(struct fid_domain *domain, struct fi_av_attr *attr,
@ -234,10 +209,14 @@ int fi_no_wait_open(struct fid_domain *domain, struct fi_wait_attr *attr,
struct fid_wait **waitset);
int fi_no_poll_open(struct fid_domain *domain, struct fi_poll_attr *attr,
struct fid_poll **pollset);
int fi_no_stx_context(struct fid_domain *domain, struct fi_tx_ctx_attr *attr,
struct fid_stx **stx, void *context);
int fi_no_srx_context(struct fid_domain *domain, struct fi_rx_ctx_attr *attr,
struct fid_ep **rx_ep, void *context);
/*
static struct fi_ops_mr X = {
.size = sizeof(struct fi_ops_mr);,
.size = sizeof(struct fi_ops_mr),
.reg = fi_no_mr_reg,
.regv = fi_no_mr_regv,
.regattr = fi_no_mr_regattr,
@ -255,7 +234,7 @@ int fi_no_mr_regattr(struct fid_domain *domain, const struct fi_mr_attr *attr,
/*
static struct fi_ops_ep X = {
.size = sizeof(struct fi_ops_ep);,
.size = sizeof(struct fi_ops_ep),
.enable = fi_no_enable,
.cancel = fi_no_cancel,
.getopt = fi_no_getopt,
@ -270,71 +249,60 @@ int fi_no_getopt(fid_t fid, int level, int optname,
void *optval, size_t *optlen);
int fi_no_setopt(fid_t fid, int level, int optname,
const void *optval, size_t optlen);
int fi_no_tx_ctx(struct fid_ep *ep, int index,
int fi_no_tx_ctx(struct fid_sep *sep, int index,
struct fi_tx_ctx_attr *attr, struct fid_ep **tx_ep,
void *context);
int fi_no_rx_ctx(struct fid_ep *ep, int index,
int fi_no_rx_ctx(struct fid_sep *sep, int index,
struct fi_rx_ctx_attr *attr, struct fid_ep **rx_ep,
void *context);
/*
static struct fi_ops_msg X = {
.size = sizeof(struct fi_ops_msg);,
.size = sizeof(struct fi_ops_msg),
.recv = fi_no_msg_recv,
.recvv = fi_no_msg_recvv,
.recvfrom = fi_no_msg_recvfrom,
.recvmsg = fi_no_msg_recvmsg,
.send = fi_no_msg_send,
.sendv = fi_no_msg_sendv,
.sendto = fi_no_msg_sendto,
.sendmsg = fi_no_msg_sendmsg,
.inject = fi_no_msg_inject,
.injectto = fi_no_msg_injectto,
.senddata = fi_no_msg_senddata,
.senddatato = fi_no_msg_senddatato,
};
*/
ssize_t fi_no_msg_recv(struct fid_ep *ep, void *buf, size_t len, void *desc,
void *context);
ssize_t fi_no_msg_recvv(struct fid_ep *ep, const struct iovec *iov, void **desc,
size_t count, void *context);
ssize_t fi_no_msg_recvfrom(struct fid_ep *ep, void *buf, size_t len, void *desc,
fi_addr_t src_addr, void *context);
ssize_t fi_no_msg_recvv(struct fid_ep *ep, const struct iovec *iov, void **desc,
size_t count, fi_addr_t src_addr, void *context);
ssize_t fi_no_msg_recvmsg(struct fid_ep *ep, const struct fi_msg *msg,
uint64_t flags);
ssize_t fi_no_msg_send(struct fid_ep *ep, const void *buf, size_t len, void *desc,
void *context);
ssize_t fi_no_msg_sendv(struct fid_ep *ep, const struct iovec *iov, void **desc,
size_t count, void *context);
ssize_t fi_no_msg_sendto(struct fid_ep *ep, const void *buf, size_t len, void *desc,
fi_addr_t dest_addr, void *context);
ssize_t fi_no_msg_sendv(struct fid_ep *ep, const struct iovec *iov, void **desc,
size_t count, fi_addr_t dest_addr, void *context);
ssize_t fi_no_msg_sendmsg(struct fid_ep *ep, const struct fi_msg *msg,
uint64_t flags);
ssize_t fi_no_msg_inject(struct fid_ep *ep, const void *buf, size_t len);
ssize_t fi_no_msg_injectto(struct fid_ep *ep, const void *buf, size_t len,
ssize_t fi_no_msg_inject(struct fid_ep *ep, const void *buf, size_t len,
fi_addr_t dest_addr);
ssize_t fi_no_msg_senddata(struct fid_ep *ep, const void *buf, size_t len, void *desc,
uint64_t data, void *context);
ssize_t fi_no_msg_senddatato(struct fid_ep *ep, const void *buf, size_t len, void *desc,
uint64_t data, fi_addr_t dest_addr, void *context);
/*
static struct fi_ops_wait X = {
.size = sizeof(struct fi_ops_wait);,
.size = sizeof(struct fi_ops_wait),
.wait = X,
};
*/
/*
static struct fi_ops_poll X = {
.size = sizeof(struct fi_ops_poll);,
.size = sizeof(struct fi_ops_poll),
.poll = X,
};
*/
/*
static struct fi_ops_eq X = {
.size = sizeof(struct fi_ops_eq);,
.size = sizeof(struct fi_ops_eq),
.read = X,
.readerr = X,
.write = fi_no_eq_write,
@ -349,7 +317,7 @@ ssize_t fi_no_eq_sread(struct fid_eq *eq, uint32_t *event,
/*
static struct fi_ops_cq X = {
.size = sizeof(struct fi_ops_cq);,
.size = sizeof(struct fi_ops_cq),
.read = X,
.readfrom = fi_no_cq_readfrom,
.readerr = X,
@ -371,7 +339,7 @@ ssize_t fi_no_cq_sreadfrom(struct fid_cq *cq, void *buf, size_t count,
/*
static struct fi_ops_cntr X = {
.size = sizeof(struct fi_ops_cntr);,
.size = sizeof(struct fi_ops_cntr),
.read = X,
.readerr = X,
.add = fi_no_cntr_add,
@ -385,91 +353,67 @@ int fi_no_cntr_wait(struct fid_cntr *cntr, uint64_t threshold, int timeout);
/*
static struct fi_ops_rma X = {
.size = sizeof(struct fi_ops_rma);,
.size = sizeof(struct fi_ops_rma),
.read = fi_no_rma_read,
.readv = fi_no_rma_readv,
.readfrom = fi_no_rma_readfrom,
.readmsg = fi_no_rma_readmsg,
.write = fi_no_rma_write,
.writev = fi_no_rma_writev,
.writeto = fi_no_rma_writeto,
.writemsg = fi_no_rma_writemsg,
.inject = fi_no_rma_inject,
.injectto = fi_no_rma_injectto,
.writedata = fi_no_rma_writedata,
.writedatato = fi_no_rma_writedatato,
};
*/
ssize_t fi_no_rma_read(struct fid_ep *ep, void *buf, size_t len, void *desc,
uint64_t addr, uint64_t key, void *context);
fi_addr_t src_addr, uint64_t addr, uint64_t key, void *context);
ssize_t fi_no_rma_readv(struct fid_ep *ep, const struct iovec *iov, void **desc,
size_t count, uint64_t addr, uint64_t key, void *context);
ssize_t fi_no_rma_readfrom(struct fid_ep *ep, void *buf, size_t len, void *desc,
fi_addr_t src_addr, uint64_t addr, uint64_t key,
size_t count, fi_addr_t src_addr, uint64_t addr, uint64_t key,
void *context);
ssize_t fi_no_rma_readmsg(struct fid_ep *ep, const struct fi_msg_rma *msg,
uint64_t flags);
ssize_t fi_no_rma_write(struct fid_ep *ep, const void *buf, size_t len, void *desc,
uint64_t addr, uint64_t key, void *context);
fi_addr_t dest_addr, uint64_t addr, uint64_t key, void *context);
ssize_t fi_no_rma_writev(struct fid_ep *ep, const struct iovec *iov, void **desc,
size_t count, uint64_t addr, uint64_t key, void *context);
ssize_t fi_no_rma_writeto(struct fid_ep *ep, const void *buf, size_t len, void *desc,
fi_addr_t dest_addr, uint64_t addr, uint64_t key,
size_t count, fi_addr_t dest_addr, uint64_t addr, uint64_t key,
void *context);
ssize_t fi_no_rma_writemsg(struct fid_ep *ep, const struct fi_msg_rma *msg,
uint64_t flags);
ssize_t fi_no_rma_inject(struct fid_ep *ep, const void *buf, size_t len,
uint64_t addr, uint64_t key);
ssize_t fi_no_rma_injectto(struct fid_ep *ep, const void *buf, size_t len,
fi_addr_t dest_addr, uint64_t addr, uint64_t key);
ssize_t fi_no_rma_writedata(struct fid_ep *ep, const void *buf, size_t len, void *desc,
uint64_t data, uint64_t addr, uint64_t key, void *context);
ssize_t fi_no_rma_writedatato(struct fid_ep *ep, const void *buf, size_t len, void *desc,
uint64_t data, fi_addr_t dest_addr, uint64_t addr, uint64_t key,
void *context);
/*
static struct fi_ops_tagged X = {
.size = sizeof(struct fi_ops_tagged);,
.size = sizeof(struct fi_ops_tagged),
.recv = fi_no_tagged_recv,
.recvv = fi_no_tagged_recvv,
.recvfrom = fi_no_tagged_recvfrom,
.recvmsg = fi_no_tagged_recvmsg,
.send = fi_no_tagged_send,
.sendv = fi_no_tagged_sendv,
.sendto = fi_no_tagged_sendto,
.sendmsg = fi_no_tagged_sendmsg,
.inject = fi_no_tagged_inject,
.injectto = fi_no_tagged_injectto,
.senddata = fi_no_tagged_senddata,
.senddatato = fi_no_tagged_senddatato,
.search = fi_no_tagged_search,
};
*/
ssize_t fi_no_tagged_recv(struct fid_ep *ep, void *buf, size_t len, void *desc,
uint64_t tag, uint64_t ignore, void *context);
fi_addr_t src_addr, uint64_t tag, uint64_t ignore, void *context);
ssize_t fi_no_tagged_recvv(struct fid_ep *ep, const struct iovec *iov, void **desc,
size_t count, uint64_t tag, uint64_t ignore, void *context);
ssize_t fi_no_tagged_recvfrom(struct fid_ep *ep, void *buf, size_t len, void *desc,
fi_addr_t src_addr,
size_t count, fi_addr_t src_addr,
uint64_t tag, uint64_t ignore, void *context);
ssize_t fi_no_tagged_recvmsg(struct fid_ep *ep, const struct fi_msg_tagged *msg,
uint64_t flags);
ssize_t fi_no_tagged_send(struct fid_ep *ep, const void *buf, size_t len, void *desc,
uint64_t tag, void *context);
ssize_t fi_no_tagged_sendv(struct fid_ep *ep, const struct iovec *iov, void **desc,
size_t count, uint64_t tag, void *context);
ssize_t fi_no_tagged_sendto(struct fid_ep *ep, const void *buf, size_t len, void *desc,
fi_addr_t dest_addr, uint64_t tag, void *context);
ssize_t fi_no_tagged_sendv(struct fid_ep *ep, const struct iovec *iov, void **desc,
size_t count, fi_addr_t dest_addr, uint64_t tag, void *context);
ssize_t fi_no_tagged_sendmsg(struct fid_ep *ep, const struct fi_msg_tagged *msg,
uint64_t flags);
ssize_t fi_no_tagged_inject(struct fid_ep *ep, const void *buf, size_t len,
uint64_t tag);
ssize_t fi_no_tagged_injectto(struct fid_ep *ep, const void *buf, size_t len,
fi_addr_t dest_addr, uint64_t tag);
ssize_t fi_no_tagged_senddata(struct fid_ep *ep, const void *buf, size_t len, void *desc,
uint64_t data, uint64_t tag, void *context);
ssize_t fi_no_tagged_senddatato(struct fid_ep *ep, const void *buf, size_t len, void *desc,
uint64_t data, fi_addr_t dest_addr, uint64_t tag, void *context);
ssize_t fi_no_tagged_search(struct fid_ep *ep, uint64_t *tag, uint64_t ignore,
uint64_t flags, fi_addr_t *src_addr, size_t *len, void *context);

Просмотреть файл

@ -156,19 +156,20 @@ struct ringbuffd {
static inline int rbfdinit(struct ringbuffd *rbfd, size_t size)
{
int ret;
int ret, flags;
rbfd->fdrcnt = 0;
rbfd->fdwcnt = 0;
ret = rbinit(&rbfd->rb, size);
if (!ret)
if (ret)
return ret;
ret = socketpair(AF_UNIX, SOCK_STREAM, 0, rbfd->fd);
if (ret < 0)
goto err1;
ret = fcntl(rbfd->fd[RB_READ_FD], F_SETFL, O_NONBLOCK);
flags = fcntl(rbfd->fd[RB_READ_FD], F_GETFL, 0);
ret = fcntl(rbfd->fd[RB_READ_FD], F_SETFL, flags | O_NONBLOCK);
if (ret < 0)
goto err2;
@ -257,20 +258,22 @@ static inline void rbfdread(struct ringbuffd *rbfd, void *buf, size_t len)
static inline size_t rbfdsread(struct ringbuffd *rbfd, void *buf, size_t len,
int timeout)
{
size_t avail;
int ret;
size_t avail;
do {
avail = rbfdused(rbfd);
if (avail) {
len = MIN(len, avail);
rbfdread(rbfd, buf, len);
return len;
}
ret = fi_poll_fd(rbfd->fd[RB_READ_FD], timeout);
} while (!ret);
avail = rbfdused(rbfd);
if (avail) {
len = MIN(len, avail);
rbfdread(rbfd, buf, len);
return len;
}
ret = fi_poll_fd(rbfd->fd[RB_READ_FD], timeout);
if (ret == 1) {
len = MIN(len, rbfdused(rbfd));
rbfdread(rbfd, buf, len);
return len;
}
return ret;
}

Просмотреть файл

@ -76,6 +76,8 @@ struct fid_cq;
struct fid_cntr;
struct fid_ep;
struct fid_pep;
struct fid_stx;
struct fid_sep;
struct fid_mr;
typedef struct fid *fid_t;
@ -124,6 +126,7 @@ typedef struct fid *fid_t;
#define FI_REMOTE_CQ_DATA (1ULL << 24)
#define FI_EVENT (1ULL << 25)
#define FI_COMPLETION FI_EVENT
#define FI_REMOTE_SIGNAL (1ULL << 26)
#define FI_REMOTE_COMPLETE (1ULL << 27)
#define FI_CANCEL (1ULL << 28)
@ -150,6 +153,7 @@ enum {
};
#define FI_ADDR_NOTAVAIL UINT64_MAX
#define FI_SHARED_CONTEXT UINT64_MAX
typedef uint64_t fi_addr_t;
typedef void * fi_connreq_t;
@ -201,28 +205,27 @@ enum {
/* Mode bits */
#define FI_CONTEXT (1ULL << 0)
#define FI_LOCAL_MR (1ULL << 1)
#define FI_WRITE_NONCOHERENT (1ULL << 2)
#define FI_PROV_MR_KEY (1ULL << 3)
#define FI_MSG_PREFIX (1ULL << 4)
#define FI_PROV_MR_ATTR (1ULL << 2)
#define FI_MSG_PREFIX (1ULL << 3)
struct fi_tx_ctx_attr {
uint64_t caps;
uint64_t mode;
uint64_t op_flags;
uint64_t msg_order;
size_t inject_size;
size_t size;
size_t iov_limit;
size_t op_alignment;
};
struct fi_rx_ctx_attr {
uint64_t caps;
uint64_t mode;
uint64_t op_flags;
uint64_t msg_order;
size_t total_buffered_recv;
size_t size;
size_t iov_limit;
size_t op_alignment;
};
struct fi_ep_attr {
@ -248,13 +251,12 @@ struct fi_domain_attr {
enum fi_progress data_progress;
size_t mr_key_size;
size_t cq_data_size;
size_t cq_cnt;
size_t ep_cnt;
size_t tx_ctx_cnt;
size_t rx_ctx_cnt;
size_t max_ep_tx_ctx;
size_t max_ep_rx_ctx;
size_t op_size;
size_t iov_size;
};
struct fi_fabric_attr {
@ -287,8 +289,11 @@ enum {
FI_CLASS_FABRIC,
FI_CLASS_DOMAIN,
FI_CLASS_EP,
FI_CLASS_SEP,
FI_CLASS_RX_CTX,
FI_CLASS_SRX_CTX,
FI_CLASS_TX_CTX,
FI_CLASS_STX_CTX,
FI_CLASS_PEP,
FI_CLASS_INTERFACE,
FI_CLASS_AV,
@ -306,7 +311,6 @@ struct fi_ops {
size_t size;
int (*close)(struct fid *fid);
int (*bind)(struct fid *fid, struct fid *bfid, uint64_t flags);
int (*sync)(struct fid *fid, uint64_t flags, void *context);
int (*control)(struct fid *fid, int command, void *arg);
int (*ops_open)(struct fid *fid, const char *name,
uint64_t flags, void **ops, void *context);
@ -356,11 +360,6 @@ static inline int fi_bind(struct fid *fid, struct fid *bfid, uint64_t flags)
return fid->ops->bind(fid, bfid, flags);
}
static inline int fi_sync(struct fid *fid, uint64_t flags, void *context)
{
return fid->ops->sync(fid, flags, context);
}
struct fi_alias {
struct fid **fid;
uint64_t flags;

Просмотреть файл

@ -107,39 +107,29 @@ struct fi_ops_atomic {
size_t size;
ssize_t (*write)(struct fid_ep *ep,
const void *buf, size_t count, void *desc,
fi_addr_t dest_addr,
uint64_t addr, uint64_t key,
enum fi_datatype datatype, enum fi_op op, void *context);
ssize_t (*writev)(struct fid_ep *ep,
const struct fi_ioc *iov, void **desc, size_t count,
uint64_t addr, uint64_t key,
enum fi_datatype datatype, enum fi_op op, void *context);
ssize_t (*writeto)(struct fid_ep *ep,
const void *buf, size_t count, void *desc,
fi_addr_t dest_addr,
uint64_t addr, uint64_t key,
enum fi_datatype datatype, enum fi_op op, void *context);
ssize_t (*writemsg)(struct fid_ep *ep,
const struct fi_msg_atomic *msg, uint64_t flags);
ssize_t (*inject)(struct fid_ep *ep, const void *buf, size_t count,
uint64_t addr, uint64_t key, enum fi_datatype datatype,
enum fi_op op);
ssize_t (*injectto)(struct fid_ep *ep, const void *buf, size_t count,
fi_addr_t dest_addr, uint64_t addr, uint64_t key,
enum fi_datatype datatype, enum fi_op op);
ssize_t (*readwrite)(struct fid_ep *ep,
const void *buf, size_t count, void *desc,
void *result, void *result_desc,
fi_addr_t dest_addr,
uint64_t addr, uint64_t key,
enum fi_datatype datatype, enum fi_op op, void *context);
ssize_t (*readwritev)(struct fid_ep *ep,
const struct fi_ioc *iov, void **desc, size_t count,
struct fi_ioc *resultv, void **result_desc, size_t result_count,
uint64_t addr, uint64_t key,
enum fi_datatype datatype, enum fi_op op, void *context);
ssize_t (*readwriteto)(struct fid_ep *ep,
const void *buf, size_t count, void *desc,
void *result, void *result_desc,
fi_addr_t dest_addr,
uint64_t addr, uint64_t key,
enum fi_datatype datatype, enum fi_op op, void *context);
@ -152,18 +142,13 @@ struct fi_ops_atomic {
const void *buf, size_t count, void *desc,
const void *compare, void *compare_desc,
void *result, void *result_desc,
fi_addr_t dest_addr,
uint64_t addr, uint64_t key,
enum fi_datatype datatype, enum fi_op op, void *context);
ssize_t (*compwritev)(struct fid_ep *ep,
const struct fi_ioc *iov, void **desc, size_t count,
const struct fi_ioc *comparev, void **compare_desc, size_t compare_count,
struct fi_ioc *resultv, void **result_desc, size_t result_count,
uint64_t addr, uint64_t key,
enum fi_datatype datatype, enum fi_op op, void *context);
ssize_t (*compwriteto)(struct fid_ep *ep,
const void *buf, size_t count, void *desc,
const void *compare, void *compare_desc,
void *result, void *result_desc,
fi_addr_t dest_addr,
uint64_t addr, uint64_t key,
enum fi_datatype datatype, enum fi_op op, void *context);
@ -186,32 +171,23 @@ struct fi_ops_atomic {
static inline ssize_t
fi_atomic(struct fid_ep *ep,
const void *buf, size_t count, void *desc,
fi_addr_t dest_addr,
uint64_t addr, uint64_t key,
enum fi_datatype datatype, enum fi_op op, void *context)
{
return ep->atomic->write(ep, buf, count, desc, addr, key,
return ep->atomic->write(ep, buf, count, desc, dest_addr, addr, key,
datatype, op, context);
}
static inline ssize_t
fi_atomicv(struct fid_ep *ep,
const struct fi_ioc *iov, void **desc, size_t count,
fi_addr_t dest_addr,
uint64_t addr, uint64_t key,
enum fi_datatype datatype, enum fi_op op, void *context)
{
return ep->atomic->writev(ep, iov, desc, count, addr, key, datatype,
op, context);
}
static inline ssize_t
fi_atomicto(struct fid_ep *ep,
const void *buf, size_t count, void *desc,
fi_addr_t dest_addr,
uint64_t addr, uint64_t key,
enum fi_datatype datatype, enum fi_op op, void *context)
{
return ep->atomic->writeto(ep, buf, count, desc, dest_addr,
addr, key, datatype, op, context);
return ep->atomic->writev(ep, iov, desc, count, dest_addr, addr, key,
datatype, op, context);
}
static inline ssize_t
@ -223,19 +199,10 @@ fi_atomicmsg(struct fid_ep *ep,
static inline ssize_t
fi_inject_atomic(struct fid_ep *ep, const void *buf, size_t count,
uint64_t addr, uint64_t key, enum fi_datatype datatype,
enum fi_op op)
fi_addr_t dest_addr, uint64_t addr, uint64_t key,
enum fi_datatype datatype, enum fi_op op)
{
return ep->atomic->inject(ep, buf, count, addr, key,
datatype, op);
}
static inline ssize_t
fi_inject_atomicto(struct fid_ep *ep, const void *buf, size_t count,
fi_addr_t dest_addr, uint64_t addr, uint64_t key,
enum fi_datatype datatype, enum fi_op op)
{
return ep->atomic->injectto(ep, buf, count, dest_addr, addr,
return ep->atomic->inject(ep, buf, count, dest_addr, addr,
key, datatype, op);
}
@ -243,36 +210,25 @@ static inline ssize_t
fi_fetch_atomic(struct fid_ep *ep,
const void *buf, size_t count, void *desc,
void *result, void *result_desc,
fi_addr_t dest_addr,
uint64_t addr, uint64_t key,
enum fi_datatype datatype, enum fi_op op, void *context)
{
return ep->atomic->readwrite(ep, buf, count, desc, result, result_desc,
addr, key, datatype, op, context);
dest_addr, addr, key, datatype, op, context);
}
static inline ssize_t
fi_fetch_atomicv(struct fid_ep *ep,
const struct fi_ioc *iov, void **desc, size_t count,
struct fi_ioc *resultv, void **result_desc, size_t result_count,
fi_addr_t dest_addr,
uint64_t addr, uint64_t key,
enum fi_datatype datatype, enum fi_op op, void *context)
{
return ep->atomic->readwritev(ep, iov, desc, count,
resultv, result_desc, result_count,
addr, key, datatype, op, context);
}
static inline ssize_t
fi_fetch_atomicto(struct fid_ep *ep,
const void *buf, size_t count, void *desc,
void *result, void *result_desc,
fi_addr_t dest_addr,
uint64_t addr, uint64_t key,
enum fi_datatype datatype, enum fi_op op, void *context)
{
return ep->atomic->readwriteto(ep, buf, count, desc, result,
result_desc, dest_addr, addr,
key, datatype, op, context);
dest_addr, addr, key, datatype, op, context);
}
static inline ssize_t
@ -290,13 +246,13 @@ fi_compare_atomic(struct fid_ep *ep,
const void *buf, size_t count, void *desc,
const void *compare, void *compare_desc,
void *result, void *result_desc,
fi_addr_t dest_addr,
uint64_t addr, uint64_t key,
enum fi_datatype datatype, enum fi_op op, void *context)
{
return ep->atomic->compwrite(ep, buf, count,
desc,compare, compare_desc,
result, result_desc, addr, key,
datatype, op, context);
return ep->atomic->compwrite(ep, buf, count, desc,
compare, compare_desc, result, result_desc,
dest_addr, addr, key, datatype, op, context);
}
static inline ssize_t
@ -304,26 +260,13 @@ fi_compare_atomicv(struct fid_ep *ep,
const struct fi_ioc *iov, void **desc, size_t count,
const struct fi_ioc *comparev, void **compare_desc, size_t compare_count,
struct fi_ioc *resultv, void **result_desc, size_t result_count,
fi_addr_t dest_addr,
uint64_t addr, uint64_t key,
enum fi_datatype datatype, enum fi_op op, void *context)
{
return ep->atomic->compwritev(ep, iov, desc, count,
comparev, compare_desc, compare_count,
resultv, result_desc, result_count,
addr, key, datatype, op, context);
}
static inline ssize_t
fi_compare_atomicto(struct fid_ep *ep,
const void *buf, size_t count, void *desc,
const void *compare, void *compare_desc,
void *result, void *result_desc,
fi_addr_t dest_addr,
uint64_t addr, uint64_t key,
enum fi_datatype datatype, enum fi_op op, void *context)
{
return ep->atomic->compwriteto(ep, buf, count, desc,
compare, compare_desc, result, result_desc,
dest_addr, addr, key, datatype, op, context);
}

Просмотреть файл

@ -119,17 +119,26 @@ struct fi_ops_domain {
struct fid_cq **cq, void *context);
int (*endpoint)(struct fid_domain *domain, struct fi_info *info,
struct fid_ep **ep, void *context);
int (*scalable_ep)(struct fid_domain *domain, struct fi_info *info,
struct fid_sep **sep, void *context);
int (*cntr_open)(struct fid_domain *domain, struct fi_cntr_attr *attr,
struct fid_cntr **cntr, void *context);
int (*wait_open)(struct fid_domain *domain, struct fi_wait_attr *attr,
struct fid_wait **waitset);
int (*poll_open)(struct fid_domain *domain, struct fi_poll_attr *attr,
struct fid_poll **pollset);
int (*stx_ctx)(struct fid_domain *domain,
struct fi_tx_ctx_attr *attr, struct fid_stx **stx,
void *context);
int (*srx_ctx)(struct fid_domain *domain,
struct fi_rx_ctx_attr *attr, struct fid_ep **rx_ep,
void *context);
};
/* Memory registration flags */
#define FI_MR_OFFSET (1ULL << 0)
#define FI_MR_KEY (1ULL << 1)
struct fi_ops_mr {
size_t size;
@ -163,6 +172,12 @@ fi_domain(struct fid_fabric *fabric, struct fi_info *info,
return fabric->ops->domain(fabric, info, domain, context);
}
static inline int
fi_domain_bind(struct fid_domain *domain, struct fid *fid, uint64_t flags)
{
return domain->fid.ops->bind(&domain->fid, fid, flags);
}
static inline int
fi_cq_open(struct fid_domain *domain, struct fi_cq_attr *attr,
struct fid_cq **cq, void *context)
@ -203,6 +218,12 @@ fi_av_open(struct fid_domain *domain, struct fi_av_attr *attr,
return domain->ops->av_open(domain, attr, av, context);
}
static inline int
fi_av_bind(struct fid_av *av, struct fid *fid, uint64_t flags)
{
return av->fid.ops->bind(&av->fid, fid, flags);
}
static inline int
fi_av_insert(struct fid_av *av, const void *addr, size_t count,
fi_addr_t *fi_addr, uint64_t flags, void *context)
@ -244,11 +265,6 @@ fi_rx_addr(fi_addr_t fi_addr, int rx_index, int rx_ctx_bits)
return (fi_addr_t) (((uint64_t) rx_index << (64 - rx_ctx_bits)) | fi_addr);
}
static inline int fi_av_sync(struct fid_av *av, uint64_t flags, void *context)
{
return fi_sync(&av->fid, flags, context);
}
#else // FABRIC_DIRECT
#include <rdma/fi_direct_domain.h>

Просмотреть файл

@ -71,10 +71,10 @@ struct fi_ops_ep {
void *optval, size_t *optlen);
int (*setopt)(fid_t fid, int level, int optname,
const void *optval, size_t optlen);
int (*tx_ctx)(struct fid_ep *ep, int index,
int (*tx_ctx)(struct fid_sep *sep, int index,
struct fi_tx_ctx_attr *attr, struct fid_ep **tx_ep,
void *context);
int (*rx_ctx)(struct fid_ep *ep, int index,
int (*rx_ctx)(struct fid_sep *sep, int index,
struct fi_rx_ctx_attr *attr, struct fid_ep **rx_ep,
void *context);
};
@ -82,27 +82,20 @@ struct fi_ops_ep {
struct fi_ops_msg {
size_t size;
ssize_t (*recv)(struct fid_ep *ep, void *buf, size_t len, void *desc,
void *context);
ssize_t (*recvv)(struct fid_ep *ep, const struct iovec *iov, void **desc,
size_t count, void *context);
ssize_t (*recvfrom)(struct fid_ep *ep, void *buf, size_t len, void *desc,
fi_addr_t src_addr, void *context);
ssize_t (*recvv)(struct fid_ep *ep, const struct iovec *iov, void **desc,
size_t count, fi_addr_t src_addr, void *context);
ssize_t (*recvmsg)(struct fid_ep *ep, const struct fi_msg *msg,
uint64_t flags);
ssize_t (*send)(struct fid_ep *ep, const void *buf, size_t len, void *desc,
void *context);
ssize_t (*sendv)(struct fid_ep *ep, const struct iovec *iov, void **desc,
size_t count, void *context);
ssize_t (*sendto)(struct fid_ep *ep, const void *buf, size_t len, void *desc,
fi_addr_t dest_addr, void *context);
ssize_t (*sendv)(struct fid_ep *ep, const struct iovec *iov, void **desc,
size_t count, fi_addr_t dest_addr, void *context);
ssize_t (*sendmsg)(struct fid_ep *ep, const struct fi_msg *msg,
uint64_t flags);
ssize_t (*inject)(struct fid_ep *ep, const void *buf, size_t len);
ssize_t (*injectto)(struct fid_ep *ep, const void *buf, size_t len,
ssize_t (*inject)(struct fid_ep *ep, const void *buf, size_t len,
fi_addr_t dest_addr);
ssize_t (*senddata)(struct fid_ep *ep, const void *buf, size_t len, void *desc,
uint64_t data, void *context);
ssize_t (*senddatato)(struct fid_ep *ep, const void *buf, size_t len, void *desc,
uint64_t data, fi_addr_t dest_addr, void *context);
};
@ -137,6 +130,17 @@ struct fid_pep {
struct fi_ops_cm *cm;
};
struct fid_stx {
struct fid fid;
struct fi_ops_ep ops;
};
struct fid_sep {
struct fid fid;
struct fi_ops_ep *ops;
struct fi_ops_cm *cm;
};
#ifndef FABRIC_DIRECT
static inline int
@ -153,11 +157,23 @@ fi_endpoint(struct fid_domain *domain, struct fi_info *info,
return domain->ops->endpoint(domain, info, ep, context);
}
static inline int
fi_scalable_ep(struct fid_domain *domain, struct fi_info *info,
struct fid_sep **sep, void *context)
{
return domain->ops->scalable_ep(domain, info, sep, context);
}
static inline int fi_ep_bind(struct fid_ep *ep, struct fid *bfid, uint64_t flags)
{
return ep->fid.ops->bind(&ep->fid, bfid, flags);
}
static inline int fi_scalable_ep_bind(struct fid_sep *sep, struct fid *bfid, uint64_t flags)
{
return sep->fid.ops->bind(&sep->fid, bfid, flags);
}
static inline int fi_enable(struct fid_ep *ep)
{
return ep->ops->enable(ep);
@ -186,37 +202,45 @@ fi_getopt(fid_t fid, int level, int optname,
}
static inline int
fi_tx_context(struct fid_ep *ep, int index, struct fi_tx_ctx_attr *attr,
fi_tx_context(struct fid_sep *sep, int index, struct fi_tx_ctx_attr *attr,
struct fid_ep **tx_ep, void *context)
{
return ep->ops->tx_ctx(ep, index, attr, tx_ep, context);
return sep->ops->tx_ctx(sep, index, attr, tx_ep, context);
}
static inline int
fi_rx_context(struct fid_ep *ep, int index, struct fi_rx_ctx_attr *attr,
fi_rx_context(struct fid_sep *sep, int index, struct fi_rx_ctx_attr *attr,
struct fid_ep **rx_ep, void *context)
{
return ep->ops->rx_ctx(ep, index, attr, rx_ep, context);
return sep->ops->rx_ctx(sep, index, attr, rx_ep, context);
}
static inline int
fi_stx_context(struct fid_domain *domain, struct fi_tx_ctx_attr *attr,
struct fid_stx **stx, void *context)
{
return domain->ops->stx_ctx(domain, attr, stx, context);
}
static inline int
fi_srx_context(struct fid_domain *domain, struct fi_rx_ctx_attr *attr,
struct fid_ep **rx_ep, void *context)
{
return domain->ops->srx_ctx(domain, attr, rx_ep, context);
}
static inline ssize_t
fi_recv(struct fid_ep *ep, void *buf, size_t len, void *desc, void *context)
fi_recv(struct fid_ep *ep, void *buf, size_t len, void *desc, fi_addr_t src_addr,
void *context)
{
return ep->msg->recv(ep, buf, len, desc, context);
return ep->msg->recv(ep, buf, len, desc, src_addr, context);
}
static inline ssize_t
fi_recvv(struct fid_ep *ep, const struct iovec *iov, void **desc,
size_t count, void *context)
size_t count, fi_addr_t src_addr, void *context)
{
return ep->msg->recvv(ep, iov, desc, count, context);
}
static inline ssize_t
fi_recvfrom(struct fid_ep *ep, void *buf, size_t len, void *desc,
fi_addr_t src_addr, void *context)
{
return ep->msg->recvfrom(ep, buf, len, desc, src_addr, context);
return ep->msg->recvv(ep, iov, desc, count, src_addr, context);
}
static inline ssize_t
@ -226,23 +250,17 @@ fi_recvmsg(struct fid_ep *ep, const struct fi_msg *msg, uint64_t flags)
}
static inline ssize_t
fi_send(struct fid_ep *ep, const void *buf, size_t len, void *desc, void *context)
fi_send(struct fid_ep *ep, const void *buf, size_t len, void *desc,
fi_addr_t dest_addr, void *context)
{
return ep->msg->send(ep, buf, len, desc, context);
return ep->msg->send(ep, buf, len, desc, dest_addr, context);
}
static inline ssize_t
fi_sendv(struct fid_ep *ep, const struct iovec *iov, void **desc,
size_t count, void *context)
size_t count, fi_addr_t dest_addr, void *context)
{
return ep->msg->sendv(ep, iov, desc, count, context);
}
static inline ssize_t
fi_sendto(struct fid_ep *ep, const void *buf, size_t len, void *desc,
fi_addr_t dest_addr, void *context)
{
return ep->msg->sendto(ep, buf, len, desc, dest_addr, context);
return ep->msg->sendv(ep, iov, desc, count, dest_addr, context);
}
static inline ssize_t
@ -252,29 +270,16 @@ fi_sendmsg(struct fid_ep *ep, const struct fi_msg *msg, uint64_t flags)
}
static inline ssize_t
fi_inject(struct fid_ep *ep, const void *buf, size_t len)
fi_inject(struct fid_ep *ep, const void *buf, size_t len, fi_addr_t dest_addr)
{
return ep->msg->inject(ep, buf, len);
}
static inline ssize_t
fi_injectto(struct fid_ep *ep, const void *buf, size_t len, fi_addr_t dest_addr)
{
return ep->msg->injectto(ep, buf, len, dest_addr);
return ep->msg->inject(ep, buf, len, dest_addr);
}
static inline ssize_t
fi_senddata(struct fid_ep *ep, const void *buf, size_t len, void *desc,
uint64_t data, void *context)
{
return ep->msg->senddata(ep, buf, len, desc, data, context);
}
static inline ssize_t
fi_senddatato(struct fid_ep *ep, const void *buf, size_t len, void *desc,
uint64_t data, fi_addr_t dest_addr, void *context)
{
return ep->msg->senddatato(ep, buf, len, desc, data, dest_addr, context);
return ep->msg->senddata(ep, buf, len, desc, data, dest_addr, context);
}
#else // FABRIC_DIRECT

Просмотреть файл

@ -147,13 +147,13 @@ struct fi_ops_eq {
ssize_t (*read)(struct fid_eq *eq, uint32_t *event,
void *buf, size_t len, uint64_t flags);
ssize_t (*readerr)(struct fid_eq *eq, struct fi_eq_err_entry *buf,
size_t len, uint64_t flags);
uint64_t flags);
ssize_t (*write)(struct fid_eq *eq, uint32_t event,
const void *buf, size_t len, uint64_t flags);
ssize_t (*sread)(struct fid_eq *eq, uint32_t *event,
void *buf, size_t len, int timeout, uint64_t flags);
const char * (*strerror)(struct fid_eq *eq, int prov_errno,
const void *err_data, void *buf, size_t len);
const void *err_data, char *buf, size_t len);
};
struct fid_eq {
@ -238,7 +238,7 @@ struct fi_ops_cq {
ssize_t (*readfrom)(struct fid_cq *cq, void *buf, size_t count,
fi_addr_t *src_addr);
ssize_t (*readerr)(struct fid_cq *cq, struct fi_cq_err_entry *buf,
size_t len, uint64_t flags);
uint64_t flags);
ssize_t (*write)(struct fid_cq *cq, const void *buf, size_t len);
ssize_t (*writeerr)(struct fid_cq *cq, struct fi_cq_err_entry *buf,
size_t len, uint64_t flags);
@ -247,7 +247,7 @@ struct fi_ops_cq {
ssize_t (*sreadfrom)(struct fid_cq *cq, void *buf, size_t count,
fi_addr_t *src_addr, const void *cond, int timeout);
const char * (*strerror)(struct fid_cq *cq, int prov_errno,
const void *err_data, void *buf, size_t len);
const void *err_data, char *buf, size_t len);
};
struct fid_cq {
@ -317,10 +317,9 @@ fi_eq_read(struct fid_eq *eq, uint32_t *event, void *buf,
}
static inline ssize_t
fi_eq_readerr(struct fid_eq *eq, struct fi_eq_err_entry *buf,
size_t len, uint64_t flags)
fi_eq_readerr(struct fid_eq *eq, struct fi_eq_err_entry *buf, uint64_t flags)
{
return eq->ops->readerr(eq, buf, len, flags);
return eq->ops->readerr(eq, buf, flags);
}
static inline ssize_t
@ -339,7 +338,7 @@ fi_eq_sread(struct fid_eq *eq, uint32_t *event, void *buf, size_t len,
static inline const char *
fi_eq_strerror(struct fid_eq *eq, int prov_errno, const void *err_data,
void *buf, size_t len)
char *buf, size_t len)
{
return eq->ops->strerror(eq, prov_errno, err_data, buf, len);
}
@ -357,10 +356,9 @@ fi_cq_readfrom(struct fid_cq *cq, void *buf, size_t count, fi_addr_t *src_addr)
}
static inline ssize_t
fi_cq_readerr(struct fid_cq *cq, struct fi_cq_err_entry *buf, size_t len,
uint64_t flags)
fi_cq_readerr(struct fid_cq *cq, struct fi_cq_err_entry *buf, uint64_t flags)
{
return cq->ops->readerr(cq, buf, len, flags);
return cq->ops->readerr(cq, buf, flags);
}
static inline ssize_t fi_cq_write(struct fid_cq *cq, const void *buf, size_t len)
@ -389,7 +387,7 @@ fi_cq_sreadfrom(struct fid_cq *cq, void *buf, size_t count,
static inline const char *
fi_cq_strerror(struct fid_cq *cq, int prov_errno, const void *err_data,
void *buf, size_t len)
char *buf, size_t len)
{
return cq->ops->strerror(cq, prov_errno, err_data, buf, len);
}

Просмотреть файл

@ -182,6 +182,7 @@ extern "C" {
#define FI_EBADFLAGS 260 /* Flags not supported */
#define FI_ENOEQ 261 /* Missing or unavailable event queue */
#define FI_EDOMAIN 262 /* Invalid resource domain */
#define FI_ENOCQ 263 /* Missing or unavailable completion queue */
const char *fi_strerror(int errnum);

Просмотреть файл

@ -57,8 +57,6 @@ struct fi_provider {
uint32_t version;
int (*getinfo)(uint32_t version, const char *node, const char *service,
uint64_t flags, struct fi_info *hints, struct fi_info **info);
int (*freeinfo)(struct fi_info *info);
struct fi_info *(*dupinfo)(const struct fi_info *info);
int (*fabric)(struct fi_fabric_attr *attr, struct fid_fabric **fabric,
void *context);
};

Просмотреть файл

@ -69,30 +69,22 @@ struct fi_msg_rma {
struct fi_ops_rma {
size_t size;
ssize_t (*read)(struct fid_ep *ep, void *buf, size_t len, void *desc,
uint64_t addr, uint64_t key, void *context);
fi_addr_t src_addr, uint64_t addr, uint64_t key, void *context);
ssize_t (*readv)(struct fid_ep *ep, const struct iovec *iov, void **desc,
size_t count, uint64_t addr, uint64_t key, void *context);
ssize_t (*readfrom)(struct fid_ep *ep, void *buf, size_t len, void *desc,
fi_addr_t src_addr, uint64_t addr, uint64_t key,
size_t count, fi_addr_t src_addr, uint64_t addr, uint64_t key,
void *context);
ssize_t (*readmsg)(struct fid_ep *ep, const struct fi_msg_rma *msg,
uint64_t flags);
ssize_t (*write)(struct fid_ep *ep, const void *buf, size_t len, void *desc,
uint64_t addr, uint64_t key, void *context);
fi_addr_t dest_addr, uint64_t addr, uint64_t key, void *context);
ssize_t (*writev)(struct fid_ep *ep, const struct iovec *iov, void **desc,
size_t count, uint64_t addr, uint64_t key, void *context);
ssize_t (*writeto)(struct fid_ep *ep, const void *buf, size_t len, void *desc,
fi_addr_t dest_addr, uint64_t addr, uint64_t key,
size_t count, fi_addr_t dest_addr, uint64_t addr, uint64_t key,
void *context);
ssize_t (*writemsg)(struct fid_ep *ep, const struct fi_msg_rma *msg,
uint64_t flags);
ssize_t (*inject)(struct fid_ep *ep, const void *buf, size_t len,
uint64_t addr, uint64_t key);
ssize_t (*injectto)(struct fid_ep *ep, const void *buf, size_t len,
fi_addr_t dest_addr, uint64_t addr, uint64_t key);
ssize_t (*writedata)(struct fid_ep *ep, const void *buf, size_t len, void *desc,
uint64_t data, uint64_t addr, uint64_t key, void *context);
ssize_t (*writedatato)(struct fid_ep *ep, const void *buf, size_t len, void *desc,
uint64_t data, fi_addr_t dest_addr, uint64_t addr, uint64_t key,
void *context);
};
@ -102,23 +94,17 @@ struct fi_ops_rma {
static inline ssize_t
fi_read(struct fid_ep *ep, void *buf, size_t len, void *desc,
uint64_t addr, uint64_t key, void *context)
fi_addr_t src_addr, uint64_t addr, uint64_t key, void *context)
{
return ep->rma->read(ep, buf, len, desc, addr, key, context);
return ep->rma->read(ep, buf, len, desc, src_addr, addr, key, context);
}
static inline ssize_t
fi_readv(struct fid_ep *ep, const struct iovec *iov, void **desc,
size_t count, uint64_t addr, uint64_t key, void *context)
size_t count, fi_addr_t src_addr, uint64_t addr, uint64_t key,
void *context)
{
return ep->rma->readv(ep, iov, desc, count, addr, key, context);
}
static inline ssize_t
fi_readfrom(struct fid_ep *ep, void *buf, size_t len, void *desc,
fi_addr_t src_addr, uint64_t addr, uint64_t key, void *context)
{
return ep->rma->readfrom(ep, buf, len, desc, src_addr, addr, key, context);
return ep->rma->readv(ep, iov, desc, count, src_addr, addr, key, context);
}
static inline ssize_t
@ -129,23 +115,17 @@ fi_readmsg(struct fid_ep *ep, const struct fi_msg_rma *msg, uint64_t flags)
static inline ssize_t
fi_write(struct fid_ep *ep, const void *buf, size_t len, void *desc,
uint64_t addr, uint64_t key, void *context)
fi_addr_t dest_addr, uint64_t addr, uint64_t key, void *context)
{
return ep->rma->write(ep, buf, len, desc, addr, key, context);
return ep->rma->write(ep, buf, len, desc, dest_addr, addr, key, context);
}
static inline ssize_t
fi_writev(struct fid_ep *ep, const struct iovec *iov, void **desc,
size_t count, uint64_t addr, uint64_t key, void *context)
size_t count, fi_addr_t dest_addr, uint64_t addr, uint64_t key,
void *context)
{
return ep->rma->writev(ep, iov, desc, count, addr, key, context);
}
static inline ssize_t
fi_writeto(struct fid_ep *ep, const void *buf, size_t len, void *desc,
fi_addr_t dst_addr, uint64_t addr, uint64_t key, void *context)
{
return ep->rma->writeto(ep, buf, len, desc, dst_addr, addr, key, context);
return ep->rma->writev(ep, iov, desc, count, dest_addr, addr, key, context);
}
static inline ssize_t
@ -156,32 +136,18 @@ fi_writemsg(struct fid_ep *ep, const struct fi_msg_rma *msg, uint64_t flags)
static inline ssize_t
fi_inject_write(struct fid_ep *ep, const void *buf, size_t len,
uint64_t addr, uint64_t key)
fi_addr_t dest_addr, uint64_t addr, uint64_t key)
{
return ep->rma->inject(ep, buf, len, addr, key);
}
static inline ssize_t
fi_inject_writeto(struct fid_ep *ep, const void *buf, size_t len,
fi_addr_t dest_addr, uint64_t addr, uint64_t key)
{
return ep->rma->injectto(ep, buf, len, dest_addr, addr, key);
return ep->rma->inject(ep, buf, len, dest_addr, addr, key);
}
static inline ssize_t
fi_writedata(struct fid_ep *ep, const void *buf, size_t len, void *desc,
uint64_t data, uint64_t addr, uint64_t key, void *context)
{
return ep->rma->writedata(ep, buf, len, desc, data, addr, key, context);
}
static inline ssize_t
fi_writedatato(struct fid_ep *ep, const void *buf, size_t len, void *desc,
uint64_t data, fi_addr_t dest_addr, uint64_t addr, uint64_t key,
void *context)
{
return ep->rma->writedatato(ep, buf, len, desc,data, dest_addr,
addr, key, context);
return ep->rma->writedata(ep, buf, len, desc,data, dest_addr,
addr, key, context);
}
#else // FABRIC_DIRECT

Просмотреть файл

@ -58,29 +58,22 @@ struct fi_msg_tagged {
struct fi_ops_tagged {
size_t size;
ssize_t (*recv)(struct fid_ep *ep, void *buf, size_t len, void *desc,
fi_addr_t src_addr,
uint64_t tag, uint64_t ignore, void *context);
ssize_t (*recvv)(struct fid_ep *ep, const struct iovec *iov, void **desc,
size_t count, uint64_t tag, uint64_t ignore, void *context);
ssize_t (*recvfrom)(struct fid_ep *ep, void *buf, size_t len, void *desc,
fi_addr_t src_addr,
size_t count, fi_addr_t src_addr,
uint64_t tag, uint64_t ignore, void *context);
ssize_t (*recvmsg)(struct fid_ep *ep, const struct fi_msg_tagged *msg,
uint64_t flags);
ssize_t (*send)(struct fid_ep *ep, const void *buf, size_t len, void *desc,
uint64_t tag, void *context);
ssize_t (*sendv)(struct fid_ep *ep, const struct iovec *iov, void **desc,
size_t count, uint64_t tag, void *context);
ssize_t (*sendto)(struct fid_ep *ep, const void *buf, size_t len, void *desc,
fi_addr_t dest_addr, uint64_t tag, void *context);
ssize_t (*sendv)(struct fid_ep *ep, const struct iovec *iov, void **desc,
size_t count, fi_addr_t dest_addr, uint64_t tag, void *context);
ssize_t (*sendmsg)(struct fid_ep *ep, const struct fi_msg_tagged *msg,
uint64_t flags);
ssize_t (*inject)(struct fid_ep *ep, const void *buf, size_t len,
uint64_t tag);
ssize_t (*injectto)(struct fid_ep *ep, const void *buf, size_t len,
fi_addr_t dest_addr, uint64_t tag);
ssize_t (*senddata)(struct fid_ep *ep, const void *buf, size_t len, void *desc,
uint64_t data, uint64_t tag, void *context);
ssize_t (*senddatato)(struct fid_ep *ep, const void *buf, size_t len, void *desc,
uint64_t data, fi_addr_t dest_addr, uint64_t tag, void *context);
ssize_t (*search)(struct fid_ep *ep, uint64_t *tag, uint64_t ignore,
uint64_t flags, fi_addr_t *src_addr, size_t *len, void *context);
@ -91,24 +84,19 @@ struct fi_ops_tagged {
static inline ssize_t
fi_trecv(struct fid_ep *ep, void *buf, size_t len, void *desc,
uint64_t tag, uint64_t ignore, void *context)
fi_addr_t src_addr, uint64_t tag, uint64_t ignore, void *context)
{
return ep->tagged->recv(ep, buf, len, desc, tag, ignore, context);
return ep->tagged->recv(ep, buf, len, desc, src_addr, tag, ignore,
context);
}
static inline ssize_t
fi_trecvv(struct fid_ep *ep, const struct iovec *iov, void **desc,
size_t count, uint64_t tag, uint64_t ignore, void *context)
size_t count, fi_addr_t src_addr, uint64_t tag, uint64_t ignore,
void *context)
{
return ep->tagged->recvv(ep, iov, desc, count, tag, ignore, context);
}
static inline ssize_t
fi_trecvfrom(struct fid_ep *ep, void *buf, size_t len, void *desc,
fi_addr_t src_addr, uint64_t tag, uint64_t ignore, void *context)
{
return ep->tagged->recvfrom(ep, buf, len, desc, src_addr, tag, ignore,
context);
return ep->tagged->recvv(ep, iov, desc, count, src_addr, tag, ignore,
context);
}
static inline ssize_t
@ -119,23 +107,16 @@ fi_trecvmsg(struct fid_ep *ep, const struct fi_msg_tagged *msg, uint64_t flags)
static inline ssize_t
fi_tsend(struct fid_ep *ep, const void *buf, size_t len, void *desc,
uint64_t tag, void *context)
fi_addr_t dest_addr, uint64_t tag, void *context)
{
return ep->tagged->send(ep, buf, len, desc, tag, context);
return ep->tagged->send(ep, buf, len, desc, dest_addr, tag, context);
}
static inline ssize_t
fi_tsendv(struct fid_ep *ep, const struct iovec *iov, void **desc,
size_t count, uint64_t tag, void *context)
size_t count, fi_addr_t dest_addr, uint64_t tag, void *context)
{
return ep->tagged->sendv(ep, iov, desc, count, tag, context);
}
static inline ssize_t
fi_tsendto(struct fid_ep *ep, const void *buf, size_t len, void *desc,
fi_addr_t dest_addr, uint64_t tag, void *context)
{
return ep->tagged->sendto(ep, buf, len, desc, dest_addr, tag, context);
return ep->tagged->sendv(ep, iov, desc, count, dest_addr,tag, context);
}
static inline ssize_t
@ -145,31 +126,18 @@ fi_tsendmsg(struct fid_ep *ep, const struct fi_msg_tagged *msg, uint64_t flags)
}
static inline ssize_t
fi_tinject(struct fid_ep *ep, const void *buf, size_t len, uint64_t tag)
fi_tinject(struct fid_ep *ep, const void *buf, size_t len,
fi_addr_t dest_addr, uint64_t tag)
{
return ep->tagged->inject(ep, buf, len, tag);
}
static inline ssize_t
fi_tinjectto(struct fid_ep *ep, const void *buf, size_t len,
fi_addr_t dest_addr, uint64_t tag)
{
return ep->tagged->injectto(ep, buf, len, dest_addr, tag);
return ep->tagged->inject(ep, buf, len, dest_addr, tag);
}
static inline ssize_t
fi_tsenddata(struct fid_ep *ep, const void *buf, size_t len, void *desc,
uint64_t data, uint64_t tag, void *context)
uint64_t data, fi_addr_t dest_addr, uint64_t tag, void *context)
{
return ep->tagged->senddata(ep, buf, len, desc, data, tag, context);
}
static inline ssize_t
fi_tsenddatato(struct fid_ep *ep, const void *buf, size_t len, void *desc,
uint64_t data, fi_addr_t dest_addr, uint64_t tag, void *context)
{
return ep->tagged->senddatato(ep, buf, len, desc, data,
dest_addr, tag, context);
return ep->tagged->senddata(ep, buf, len, desc, data,
dest_addr, tag, context);
}
static inline ssize_t

Просмотреть файл

@ -1,189 +1,237 @@
.TH "FI_DIRECT" 7 "2014-11-11" "Libfabric v0.0.2" "Libfabric Programmer's Manual" libfabric
.TH fabric 7 "2014\-12\-02" "Libfabric Programmer\[aq]s Manual" "\@VERSION\@"
.SH NAME
.PP
Fabric Interface Library
.SH SYNOPSIS
.B #include <rdma/fabric.h>
.IP
.nf
\f[C]
#include\ <rdma/fabric.h>
\f[]
.fi
.PP
Libfabric is a high-performance fabric software library designed to
provide low-latency interfaces to fabric hardware.
.SH OVERVIEW
Libfabric provides 'process direct I/O' to application software communicating
across fabric software and hardware. Process direct I/O, historically
referred to as RDMA, allows an application to directly access network
resources without operating system interventions. Data transfers can
occur directly to and from application memory.
.PP
Libfabric provides \[aq]process direct I/O\[aq] to application software
communicating across fabric software and hardware.
Process direct I/O, historically referred to as RDMA, allows an
application to directly access network resources without operating
system interventions.
Data transfers can occur directly to and from application memory.
.PP
There are two components to the libfabric software:
.IP "Fabric Providers" 12
Conceptually, a fabric provider may be viewed as a local hardware
NIC driver, though a provider is not limited by this definition. The first
component of libfabric is a general purpose framework that is capable of
handling different types of fabric hardware. All fabric hardware
devices and their software drivers are required to support this framework.
Devices and the drivers that plug into the libfabric framework are
referred to as fabric providers, or simply providers. Provider details
may be found in fi_prov.
.PP
.IP "Fabric Interfaces" 12
The second component is a set of communication operations. Libfabric
defines several sets of communication functions that providers can support.
\f[I]Fabric Providers\f[] : Conceptually, a fabric provider may be
viewed as a local hardware NIC driver, though a provider is not limited
by this definition.
The first component of libfabric is a general purpose framework that is
capable of handling different types of fabric hardware.
All fabric hardware devices and their software drivers are required to
support this framework.
Devices and the drivers that plug into the libfabric framework are
referred to as fabric providers, or simply providers.
Provider details may be found in fi_prov.
.PP
\f[I]Fabric Interfaces\f[] : The second component is a set of
communication operations.
Libfabric defines several sets of communication functions that providers
can support.
It is not required that providers implement all the interfaces that are
defined; however, providers clearly indicate which interfaces they do
support.
.sp
The fabric interfaces are designed such that they are
cohesive and not simply a union of disjoint interfaces. The interfaces are
logically divided into two groups: control interfaces and communication
operations. The control interfaces are a common set of operations that
provide access to local communication resources, such as address vectors
and event queues. The communication operations expose particular models
of communication and fabric functionality, such as message queues, remote
memory access, and atomic operations. Communication operations are
associated with fabric endpoints.
.sp
.PP
The fabric interfaces are designed such that they are cohesive and not
simply a union of disjoint interfaces.
The interfaces are logically divided into two groups: control interfaces
and communication operations.
The control interfaces are a common set of operations that provide
access to local communication resources, such as address vectors and
event queues.
The communication operations expose particular models of communication
and fabric functionality, such as message queues, remote memory access,
and atomic operations.
Communication operations are associated with fabric endpoints.
.PP
Applications will typically use the control interfaces to discover local
capabilities and allocate necessary resources. They will then allocate and
configure a communication endpoint to send and receive data, or perform
other types of data transfers, with remote endpoints.
.SH "CONTROL INTERFACES"
The control interfaces APIs provide applications access to network resources.
This involves listing all the interfaces available, obtaining the capabilities
of the interfaces and opening a provider.
.IP "fi_getinfo - Fabric Information" 12
The fi_getinfo call is the base call used to discover and request fabric
services offered by the system. Applications can use this call to
indicate the type of communication that they desire. The results from
fi_getinfo, fi_info, are used to reserve and configure fabric resources.
.sp
fi_getinfo returns a list of fi_info structures. Each structure references
a single fabric provider, indicating the interfaces that the provider
supports, along with a named set of resources. A fabric provider may
include multiple fi_info structures in the returned list.
.IP "fi_fabric - Fabric Domain" 12
A fabric domain represents a collection of hardware and software resources
that access a single physical or virtual network. All network ports on a
system that can communicate with each other through the fabric belong to
the same fabric domain. A fabric domain shares network addresses and
can span multiple providers. libfabric supports systems connected to
multiple fabrics.
.IP "fi_domain - Access Domains" 12
An access domain represents a single logical connection into a fabric.
capabilities and allocate necessary resources.
They will then allocate and configure a communication endpoint to send
and receive data, or perform other types of data transfers, with remote
endpoints.
.SH CONTROL INTERFACES
.PP
The control interfaces APIs provide applications access to network
resources.
This involves listing all the interfaces available, obtaining the
capabilities of the interfaces and opening a provider.
.PP
\f[I]fi_getinfo - Fabric Information\f[] : The fi_getinfo call is the
base call used to discover and request fabric services offered by the
system.
Applications can use this call to indicate the type of communication
that they desire.
The results from fi_getinfo, fi_info, are used to reserve and configure
fabric resources.
.PP
fi_getinfo returns a list of fi_info structures.
Each structure references a single fabric provider, indicating the
interfaces that the provider supports, along with a named set of
resources.
A fabric provider may include multiple fi_info structures in the
returned list.
.PP
\f[I]fi_fabric - Fabric Domain\f[] : A fabric domain represents a
collection of hardware and software resources that access a single
physical or virtual network.
All network ports on a system that can communicate with each other
through the fabric belong to the same fabric domain.
A fabric domain shares network addresses and can span multiple
providers.
libfabric supports systems connected to multiple fabrics.
.PP
\f[I]fi_domain - Access Domains\f[] : An access domain represents a
single logical connection into a fabric.
It may map to a single physical or virtual NIC or a port.
An access domain defines the boundary across which fabric resources
may be associated. Each access domain belongs to a single fabric domain.
.IP "fi_endpoint - Fabric Endpoint" 12
A fabric endpoint is a communication portal. An endpoint may be either
active or passive. Passive endpoints are used to listen for connection
requests. Active endpoints can perform data transfers. Endpoints are
configured with specific communication capabilities and data transfer
interfaces.
.IP "fi_eq - Event Queue" 12
Event queues, are used to collect
and report the completion of asynchronous operations. For example, the
completion of a data transfer operation submitted over a fabric endpoint
may write an event to an event queue associated with the endpoint.
An access domain defines the boundary across which fabric resources may
be associated.
Each access domain belongs to a single fabric domain.
.PP
\f[I]fi_endpoint - Fabric Endpoint\f[] : A fabric endpoint is a
communication portal.
An endpoint may be either active or passive.
Passive endpoints are used to listen for connection requests.
Active endpoints can perform data transfers.
Endpoints are configured with specific communication capabilities and
data transfer interfaces.
.PP
\f[I]fi_eq - Event Queue\f[] : Event queues, are used to collect and
report the completion of asynchronous operations.
For example, the completion of a data transfer operation submitted over
a fabric endpoint may write an event to an event queue associated with
the endpoint.
There are multiple types of event queues, and the format of the events
that they report are controlled by applications.
.IP "fi_cntr - Event Counters" 12
Event counters are used to report the number of completed asynchronous
operations. Event counters are considered light-weight, in that a
completion simply increments a counter, rather than placing an entry into
an event queue.
.IP "fi_mr - Memory Region" 12
Memory regions describe application local memory buffers. In order for
fabric resources to access application memory, the application must first
grant permission to the fabric provider by constructing a memory region.
Memory regions are required for specific types of data transfer operations,
such as RMA transfers (see below).
.IP "fi_av - Address Vector" 12
Address vectors are used to map higher level addresses, such as IP
addresses, which may be more natural for an application to use, into
fabric specific addresses. The use of address vectors allows providers
to reduce the amount of memory required to maintain large address
look-up tables, and eliminate expensive address resolution and look-up
methods during data transfer operations.
.SH "DATA TRANSFER INTERFACES"
.PP
\f[I]fi_cntr - Event Counters\f[] : Event counters are used to report
the number of completed asynchronous operations.
Event counters are considered light-weight, in that a completion simply
increments a counter, rather than placing an entry into an event queue.
.PP
\f[I]fi_mr - Memory Region\f[] : Memory regions describe application
local memory buffers.
In order for fabric resources to access application memory, the
application must first grant permission to the fabric provider by
constructing a memory region.
Memory regions are required for specific types of data transfer
operations, such as RMA transfers (see below).
.PP
\f[I]fi_av - Address Vector\f[] : Address vectors are used to map higher
level addresses, such as IP addresses, which may be more natural for an
application to use, into fabric specific addresses.
The use of address vectors allows providers to reduce the amount of
memory required to maintain large address look-up tables, and eliminate
expensive address resolution and look-up methods during data transfer
operations.
.SH DATA TRANSFER INTERFACES
.PP
Fabric endpoints are associated with multiple data transfer interfaces.
Each interface set is designed to support a specific style of communication,
with an endpoint allowing the different interfaces to be used in conjunction.
Each interface set is designed to support a specific style of
communication, with an endpoint allowing the different interfaces to be
used in conjunction.
The following data transfer interfaces are defined by libfabric.
.IP "fi_msg - Message Queue" 12
Message queues expose a simple, message-based FIFO queue interface to
the application. Message data transfers allow applications to send and
receive data with message boundaries being maintained.
.IP "fi_tagged - Tagged Message Queues" 12
Tagged message lists expose send/receive data transfer operations
built on the concept of tagged messaging. The tagged message queue is
conceptually similar to standard message queues, but with the addition
of 64-bit tags for each message. Sent messages are matched with receive
buffers that are tagged with a similar value.
.IP "fi_rma - Remote Memory Access" 12
RMA transfers are one-sided operations that read or write data directly
to a remote memory region. Other than defining the appropriate memory
region, RMA operations do not require interaction at the target side for
the data transfer to complete.
.IP "fi_atomic - Atomic" 12
Atomic operations can perform one of several operations on a remote
memory region. Atomic operations include well-known functionality, such
as atomic-add and compare-and-swap, plus several other pre-defined calls.
Unlike other data transfer interfaces, atomic operations are aware of the
data formatting at the target memory region.
.SH "PROVIDER REQUIREMENTS"
.PP
\f[I]fi_msg - Message Queue\f[] : Message queues expose a simple,
message-based FIFO queue interface to the application.
Message data transfers allow applications to send and receive data with
message boundaries being maintained.
.PP
\f[I]fi_tagged - Tagged Message Queues\f[] : Tagged message lists expose
send/receive data transfer operations built on the concept of tagged
messaging.
The tagged message queue is conceptually similar to standard message
queues, but with the addition of 64-bit tags for each message.
Sent messages are matched with receive buffers that are tagged with a
similar value.
.PP
\f[I]fi_rma - Remote Memory Access\f[] : RMA transfers are one-sided
operations that read or write data directly to a remote memory region.
Other than defining the appropriate memory region, RMA operations do not
require interaction at the target side for the data transfer to
complete.
.PP
\f[I]fi_atomic - Atomic\f[] : Atomic operations can perform one of
several operations on a remote memory region.
Atomic operations include well-known functionality, such as atomic-add
and compare-and-swap, plus several other pre-defined calls.
Unlike other data transfer interfaces, atomic operations are aware of
the data formatting at the target memory region.
.SH PROVIDER REQUIREMENTS
.PP
Libfabric provides a general framework for supporting multiple types of
fabric objects and their related interfaces. Fabric providers have a large
amount of flexibility in selecting which components they are able and
willing to support, based on specific hardware constraints. To assist in
the development of applications, libfabric specifies the following
requirements that must be met by any fabric provider, if requested by an
application. (Note that the instantiation of a specific fabric object is
subject to application configuration parameters and need not meet these
fabric objects and their related interfaces.
Fabric providers have a large amount of flexibility in selecting which
components they are able and willing to support, based on specific
hardware constraints.
To assist in the development of applications, libfabric specifies the
following requirements that must be met by any fabric provider, if
requested by an application.
(Note that the instantiation of a specific fabric object is subject to
application configuration parameters and need not meet these
requirements).
.IP \(bu
.IP \[bu] 2
A fabric provider must support at least one endpoint type.
.IP \(bu
.IP \[bu] 2
All endpoints must support the message queue data transfer interface.
.IP \(bu
.IP \[bu] 2
An endpoint that advertises support for a specific endpoint capability
must support the corresponding data transfer interface.
.IP \(bu
.IP \[bu] 2
Endpoints must support operations to send and receive data for any data
transfer operations that they support.
.IP \(bu
Connectionless endpoints must support all relevant 'to/from' data transfer
routines. (sendto / recvfrom / writeto / readfrom / etc.)
.IP \(bu
Connectionless endpoints must support the CM interfaces for getname, getpeer,
and connect.
.IP \(bu
Connectionless endpoints that support multicast operations must support the
CM interfaces join and leave.
.IP \(bu
Connection-oriented interfaces must support the CM interfaces getname, getpeer,
connect, listen, accept, reject, and shutdown.
.IP \(bu
All endpoints must support all relevant 'msg' data transfer routines.
transfer operations that they support.
.IP \[bu] 2
Connectionless endpoints must support all relevant data transfer
routines.
(send / recv / write / read / etc.)
.IP \[bu] 2
Connectionless endpoints must support the CM interfaces for getname,
getpeer, and connect.
.IP \[bu] 2
Connectionless endpoints that support multicast operations must support
the CM interfaces join and leave.
.IP \[bu] 2
Connection-oriented interfaces must support the CM interfaces getname,
getpeer, connect, listen, accept, reject, and shutdown.
.IP \[bu] 2
All endpoints must support all relevant \[aq]msg\[aq] data transfer
routines.
(sendmsg / recvmsg / writemsg / readmsg / etc.)
.IP \(bu
.IP \[bu] 2
Access domains must support opening address vector maps and tables.
.IP \(bu
.IP \[bu] 2
Address vectors associated with domains that may be identified using IP
addresses must support FI_SOCKADDR_IN and FI_SOCKADDR_IN6 input formats.
.IP \(bu
Address vectors must support FI_ADDR, FI_ADDR_INDEX, and FI_AV output formats.
.IP \(bu
.IP \[bu] 2
Address vectors must support FI_ADDR, FI_ADDR_INDEX, and FI_AV output
formats.
.IP \[bu] 2
Access domains must support opening event queues and counters.
.IP \(bu
.IP \[bu] 2
Event queues must support the FI_EQ_FORMAT_CONTEXT format.
.IP \(bu
.IP \[bu] 2
Event queues associated with data transfer completions must support the
FI_EQ_FORMAT_DATA format.
.IP \(bu
.IP \[bu] 2
Event queues associated with tagged message transfers must support the
FI_EQ_FORMAT_TAGGED format.
.IP \(bu
.IP \[bu] 2
A provider is expected to be forward compatible, and must be able to be
compiled against expanded fi_xxx_ops structures that define new functions
added after the provider was written. Any unknown functions must be set
to NULL.
.SH "SEE ALSO"
fi_getinfo(3), fi_endpoint(3), fi_domain(3), fi_av(3), fi_eq(3), fi_mr(3)
compiled against expanded \f[C]fi_xxx_ops\f[] structures that define new
functions added after the provider was written.
Any unknown functions must be set to NULL.
.SH SEE ALSO
.PP
\f[C]fi_getinfo\f[](3), \f[C]fi_endpoint\f[](3), \f[C]fi_domain\f[](3),
\f[C]fi_av\f[](3), \f[C]fi_eq\f[](3), \f[C]fi_mr\f[](3)
.SH AUTHORS
OpenFabrics.

Просмотреть файл

@ -1,467 +0,0 @@
.TH "FI_ATOMIC" 3 "2014-11-11" "Libfabric v0.0.2" "Libfabric Programmer's Manual" libfabric
.SH NAME
fi_atomic - Remote atomic functions
.PP
fi_atomic / fi_atomicv
.br
fi_atomicto / fi_atomicmsg
.RS
Initiates an atomic operation to remote memory
.RE
.P
fi_fetch_atomic / fi_fetch_atomicv
.br
fi_fetch_atomicto / fi_fetch_atomicmsg
.RS
Initiates an atomic operation to remote memory, retrieving the initial
value.
.RE
.P
fi_compare_atomic / fi_compare_atomicv
.br
fi_compare_atomicto / fi_compare_atomicmsg
.RS
Initiates an atomic compare-operation to remote memory, retrieving the initial
value.
.RE
.P
fi_atomic_valid / fi_fetch_atomic_valid / fi_compare_atomic_valid
.RS
Indicates if a provider supports a specific atomic operation
.RE
.SH SYNOPSIS
.B #include <rdma/fi_atomic.h>
.HP
.BI "ssize_t fi_atomic(struct fid_ep *" ep ","
.BI "const void * " buf ", size_t " count ", void *" desc ","
.BI "uint64_t " addr ", uint64_t " key ","
.BI "enum fi_datatype " datatype ", enum fi_op " op ", void * " context ");"
.HP
.BI "ssize_t fi_atomicv(struct fid_ep *" ep ","
.BI "const struct fi_ioc * " iov ", void **" desc ", size_t " count ","
.BI "uint64_t " addr ", uint64_t " key ","
.BI "enum fi_datatype " datatype ", enum fi_op " op ", void * " context ");"
.HP
.BI "ssize_t fi_atomicto(struct fid_ep *" ep ","
.BI "const void * " buf ", size_t " count ", void *" desc ","
.BI "fi_addr_t " dest_addr ","
.BI "uint64_t " addr ", uint64_t " key ","
.BI "enum fi_datatype " datatype ", enum fi_op " op ", void * " context ");"
.HP
.BI "ssize_t fi_atomicmsg(struct fid_ep *" ep ","
.BI "const struct fi_msg_atomic * " msg ","
.BI "uint64_t " flags ");"
.HP
.BI "ssize_t fi_inject_atomic(struct fid_ep *" ep ","
.BI "const void *" buf ","
.BI "size_t " count ","
.BI "uint64_t " addr ","
.BI "uint64_t " key ","
.BI "enum fi_datatype " datatype ","
.BI "enum fi_op " op ");"
.HP
.BI "ssize_t fi_inject_atomicto(struct fid_ep *" ep ","
.BI "const void *" buf ","
.BI "size_t " count ","
.BI "fi_addr_t " dest_addr ","
.BI "uint64_t " addr ","
.BI "uint64_t " key ","
.BI "enum fi_datatype " datatype ","
.BI "enum fi_op " op ");"
.PP
.HP
.BI "ssize_t fi_fetch_atomic(struct fid_ep *" ep ","
.BI "const void * " buf ", size_t " count ", void *" desc ","
.BI "void * " result ", void *" result_desc ","
.BI "uint64_t " addr ", uint64_t " key ","
.BI "enum fi_datatype " datatype ", enum fi_op " op ", void * " context ");"
.HP
.BI "ssize_t fi_fetch_atomicv(struct fid_ep *" ep ","
.BI "const struct fi_ioc * " iov ", void **" desc ", size_t " count ","
.BI "struct fi_ioc * " resultv ", void **" result_desc ", size_t " result_count ","
.BI "uint64_t " addr ", uint64_t " key ","
.BI "enum fi_datatype " datatype ", enum fi_op " op ", void * " context ");"
.HP
.BI "ssize_t fi_fetch_atomicto(struct fid_ep *" ep ","
.BI "const void * " buf ", size_t " count ", void *" desc ","
.BI "void * " result ", void *" result_desc ","
.BI "fi_addr_t " dest_addr ","
.BI "uint64_t " addr ", uint64_t " key ","
.BI "enum fi_datatype " datatype ", enum fi_op " op ", void * " context ");"
.HP
.BI "ssize_t fi_fetch_atomicmsg(struct fid_ep *" ep ","
.BI "const struct fi_msg_atomic * " msg ","
.BI "struct fi_ioc * " resultv ", void **" result_desc ", size_t " result_count ","
.BI "uint64_t " flags ");"
.PP
.HP
.BI "ssize_t fi_compare_atomic(struct fid_ep *" ep ","
.BI "const void * " buf ", size_t " count ", void *" desc ","
.BI "const void * " compare ", void *" compare_desc ","
.BI "void * " result ", void *" result_desc ","
.BI "uint64_t " addr ", uint64_t " key ","
.BI "enum fi_datatype " datatype ", enum fi_op " op ", void * " context ");"
.HP
.BI "ssize_t fi_compare_atomicv(struct fid_ep *" ep ","
.BI "const struct fi_ioc * " iov ", void **" desc ", size_t " count ","
.BI "const struct fi_ioc * " comparev ", void **" compare_desc ", size_t " compare_count ","
.BI "struct fi_ioc * " resultv ", void **" result_desc ", size_t " result_count ","
.BI "uint64_t " addr ", uint64_t " key ","
.BI "enum fi_datatype " datatype ", enum fi_op " op ", void * " context ");"
.HP
.BI "ssize_t fi_compare_atomicto(struct fid_ep *" ep ","
.BI "const void * " buf ", size_t " count ", void *" desc ","
.BI "const void * " compare ", void *" compare_desc ","
.BI "void * " result ", void *" result_desc ","
.BI "fi_addr_t " dest_addr ","
.BI "uint64_t " addr ", uint64_t " key ","
.BI "enum fi_datatype " datatype ", enum fi_op " op ", void * " context ");"
.HP
.BI "ssize_t fi_compare_atomicmsg(struct fid_ep *" ep ","
.BI "const struct fi_msg_atomic * " msg ","
.BI "const struct fi_ioc * " comparev ", void **" compare_desc ", size_t " compare_count ","
.BI "struct fi_ioc * " resultv ", void **" result_desc ", size_t " result_count ","
.BI "uint64_t " flags ");"
.PP
.HP
.BI "int fi_atomicvalid(struct fid_ep *" ep ","
.BI "enum fi_datatype " datatype ", enum fi_op " op ", size_t " count ");"
.HP
.BI "int fi_fetch_atomicvalid(struct fid_ep *" ep ","
.BI "enum fi_datatype " datatype ", enum fi_op " op ", size_t " count ");"
.HP
.BI "int fi_compare_atomicvalid(struct fid_ep *" ep ","
.BI "enum fi_datatype " datatype ", enum fi_op " op ", size_t " count ");"
.SH ARGUMENTS
.IP "ep"
Fabric endpoint on which to initiate atomic operation.
.IP "buf"
Local data buffer that specifies first operand of atomic operation
.IP "iov / comparev / resultv"
Vectored data buffer(s).
.IP "count / compare_count / result_count"
Count of vectored data entries.
.IP "addr"
Address of remote memory to access.
.IP "key"
Protection key associated with the remote memory.
.IP "datatype"
Datatype associated with atomic operands
.IP "op"
Atomic operation to perform
.IP "compare"
Local compare buffer, containing comparison data.
.IP "result"
Local data buffer to store initial value of remote buffer
.IP "desc / compare_desc / result_desc"
Data descriptor associated with the local data buffer, local compare buffer,
and local result buffer, respectively.
.IP "dest_addr"
Destination address for connectionless atomic operations
.IP "msg"
Message descriptor for atomic operations
.IP "flags"
Additional flags to apply for the atomic operation
.IP "context"
User specified pointer to associate with the operation.
.SH "DESCRIPTION"
Atomic transfers are used to read and update data located in remote memory
regions in an atomic fashion. Conceptually, they are similar to local
atomic operations of a similar nature (e.g. atomic increment, compare and
swap, etc.). Updates to remote data involve one of several operations on
the data, and act on specific types of data, as listed below. As such,
atomic transfers have knowledge of the format of the data being
accessed. A single atomic function may operate across an array of data
applying an atomic operation to each entry, but the atomicity of an
operation is limited to a single datatype or entry.
.SS "Atomic Data Types"
Atomic functions may operate on one of the following identified data types.
A given atomic function may support any datatype, subject to provider
implementation constraints.
.IP "FI_INT8"
Signed 8-bit integer.
.IP "FI_UINT8"
Unsigned 8-bit integer.
.IP "FI_INT16"
Signed 16-bit integer.
.IP "FI_UINT16"
Unsigned 16-bit integer.
.IP "FI_INT32"
Signed 32-bit integer.
.IP "FI_UINT32"
Unsigned 32-bit integer.
.IP "FI_INT64"
Signed 64-bit integer.
.IP "FI_UINT64"
Unsigned 64-bit integer.
.IP "FI_FLOAT"
A single-precision floating point value (IEEE 754).
.IP "FI_DOUBLE"
A double-precision floating point value (IEEE 754).
.IP "FI_FLOAT_COMPLEX"
An ordered pair of single-precision floating point values (IEEE 754),
with the first value representing the real portion of a complex
number and the second representing the imaginary portion.
.IP "FI_DOUBLE_COMPLEX"
An ordered pair of double-precision floating point values (IEEE 754),
with the first value representing the real portion of a complex
number and the second representing the imaginary portion.
.IP "FI_LONG_DOUBLE"
A double-extended precision floating point value (IEEE 754).
.IP "FI_LONG_DOUBLE_COMPLEX"
An ordered pair of double-extended precision floating point values (IEEE 754),
with the first value representing the real portion of a complex
number and the second representing the imaginary portion.
.SS "Atomic Operations"
The following atomic operations are defined. An atomic operation often acts
against a target value in the remote memory buffer and source value provided
with the atomic function. It may also carry source data to replace the
target value in compare and swap operations. A conceptual description of
each operation is provided.
.IP "FI_MIN"
Minimum
.nf
if (buf[i] < addr[i])
addr[i] = buf[i]
.fi
.IP "FI_MAX"
Maximum
.nf
if (buf[i] > addr[i])
addr[i] = buf[i]
.fi
.IP "FI_SUM"
Sum
.nf
addr[i] = addr[i] + buf[i]
.fi
.IP "FI_PROD"
Product
.nf
addr[i] = addr[i] * buf[i]
.fi
.IP "FI_LOR"
Logical OR
.nf
addr[i] = (addr[i] || buf[i])
.fi
.IP "FI_LAND"
Logical AN
.nf
addr[i] = (addr[i] && buf[i])
.fi
.IP "FI_BOR"
Bitwise OR
.nf
addr[i] = addr[i] | buf[i]
.fi
.IP "FI_BAND"
Bitwise AND
.nf
addr[i] = addr[i] & buf[i]
.fi
.IP "FI_LXOR"
Logical exclusive-OR (XOR)
.nf
addr[i] = ((addr[i] && !buf[i]) || (!addr[i] && buf[i]))
.fi
.IP "FI_BXOR"
Bitwise exclusive-OR (XOR)
.nf
addr[i] = addr[i] ^ buf[i]
.fi
.IP "FI_ATOMIC_READ"
Read data atomically
.nf
buf[i] = addr[i]
.fi
.IP "FI_ATOMIC_WRITE"
Write data atomically
.nf
addr[i] = buf[i]
.fi
.IP "FI_CSWAP"
Compare values and if equal swap with data
.nf
if (addr[i] == compare[i])
addr[i] = buf[i]
.fi
.IP "FI_CSWAP_NE"
Compare values and if not equal swap with data
.nf
if (addr[i] != compare[i])
addr[i] = buf[i]
.fi
.IP "FI_CSWAP_LE"
Compare values and if less than or equal swap with data
.nf
if (addr[i] <= compare[i])
addr[i] = buf[i]
.fi
.IP "FI_CSWAP_LT"
Compare values and if less than swap with data
.nf
if (addr[i] < compare[i])
addr[i] = buf[i]
.fi
.IP "FI_CSWAP_GE"
Compare values and if greater than or equal swap with data
.nf
if (addr[i] >= compare[i])
addr[i] = buf[i]
.fi
.IP "FI_CSWAP_GT"
Compare values and if greater than swap with data
.nf
if (addr[i] > compare[i])
addr[i] = buf[i]
.fi
.IP "FI_MSWAP"
Swap masked bits with data
.nf
addr[i] = (buf[i] & compare[i]) | (addr[i] & ~compare[i])
.fi
.SS "Base Atomic Functions"
The base atomic functions -- fi_atomic, fi_atomicv, fi_atomicto,
fi_atomicmsg -- are used to transmit data to a remote node,
where the specified atomic operation is performed against the target data.
The result of a base atomic function is stored at the remote memory region.
The main difference between atomic functions are the number and type of
parameters that they accept as input. Otherwise, they perform the same
general function.
.PP
The call fi_atomic transfers the data contained in the user-specified data
buffer to a remote node. The local endpoint must be connected to
a remote endpoint or destination before fi_atomic is called. Unless the
endpoint has been configured differently, the data buffer passed into
fi_atomic must not be touched by the application until the fi_atomic
call completes asynchronously. The target buffer of a base atomic
operation must allow for remote read an/or write access, as appropriate.
.PP
The fi_atomicv call adds support for a scatter-gather list to fi_atomic.
The fi_atomicv transfers the set of data buffers referenced by
the ioc parameter to the remote node for processing.
.PP
The fi_atomicto function is equivalent to fi_atomic for unconnected endpoints.
.PP
The fi_inject_atomic call is an optimized version of fi_atomic. The
fi_inject_atomic function behaves as if the FI_INJECT transfer flag were set,
and FI_EVENT were not. That is, the data buffer is available for reuse
immediately on returning from from fi_inject_atomic, and no completion event
will be generated for this atomic. The completion event will be suppressed
even if the endpoint has not been configured with FI_EVENT. See the flags
discussion below for more details.
.PP
The fi_inject_atomicto is equivalent to fi_inject_atomic for unconnected
endpoints.
.PP
The fi_atomicmsg call supports atomic functions over both connected and unconnected
endpoints, with the ability to control the atomic operation per call through the
use of flags. The fi_atomicmsg function takes a struct fi_msg_atomic as input.
.PP
.nf
struct fi_msg_atomic {
const struct fi_ioc *msg_iov; /* local scatter-gather array */
void **desc; /* local access descriptors */
size_t iov_count;/* # elements in ioc */
const void *addr; /* optional endpoint address */
const struct fi_rma_ioc *rma_iov; /* remote SGL */
size_t rma_iov_count;/* # elements in remote SGL */
enum fi_datatype datatype; /* operand datatype */
enum fi_op op; /* atomic operation */
void *context; /* user-defined context */
uint64_t data; /* optional data */
};
struct fi_rma_ioc {
uint64_t addr; /* target address */
size_t count; /* # target operands */
uint64_t key; /* access key */
};
.fi
.SS "Fetch-Atomic Functions"
The fetch atomic functions -- fi_fetch_atomic,
fi_fetch_atomicv, fi_fetch_atomicto, and
fi_fetch atomicmsg -- behave similar to the equivalent base atomic function.
The difference between the fetch and base atomic calls are the fetch
atomic routines return the initial value that was stored at the target
to the user. The initial value is read into the user provided result
buffer. The target buffer of fetch-atomic operations must be enabled
for remote read access.
.PP
The following list of atomic operations are usable with both the base
atomic and fetch atomic operations: FI_MIN, FI_MAX, FI_SUM, FI_PROD,
FI_LOR, FI_LAND, FI_BOR, FI_BAND, FI_LXOR, FI_BXOR, FI_ATOMIC_READ,
and FI_ATOMIC_WRITE.
.SS "Compare-Atomic Functions"
The compare atomic functions -- fi_compare_atomic,
fi_compare_atomicv, fi_compare_atomicto, and
fi_compare atomicmsg -- are used for operations that require comparing
the target data against a value before performing a swap operation.
The compare atomic functions support: FI_CSWAP, FI_CSWAP_NE, FI_CSWAP_LE,
FI_CSWAP_LT, FI_CSWAP_GE, FI_CSWAP_GT, and FI_MSWAP.
.SS "Atomic Valid Functions"
The atomic valid functions -- fi_atomicvalid, fi_fetch_atomicvalid,
and fi_compare_atomicvalid --indicate which operations the local provider
supports. Needed operations not supported by the provider must be emulated
by the application. Each valid call corresponds to a set of atomic functions.
fi_atomicvalid checks whether a provider supports a specific base atomic
operation for a given datatype and operation. fi_fetch_atomicvalid indicates
if a provider supports a specific fetch-atomic operation for a given datatype
and operation. And fi_compare_atomicvalid checks if a provider supports a
specified compare-atomic operation for a given datatype and operation.
.PP
If an operation is supported, an atomic valid call will return 0, along with
a count of atomic data units that a single function call will operate on.
.SS "Completions"
Completed atomic operations are reported to the user through one or more event
collectors associated with the endpoint. Users provide context which are
associated with each operation, and is returned to the user
as part of the event completion. See fi_eq for completion event details.
.PP
Updates to the target buffer of an atomic operation are visible to
processes running on the target system either after a completion has been
generated, or after the completion of an operation initiated after the
atomic call with a fencing operation occurring in between. For example,
the target process may be notified by the initiator sending a message
after the atomic call completes, or sending a fenced message immediately
after initiating the atomic operation.
.SH "FLAGS"
The fi_atomicmsg, fi_fetch_atomicmsg, and fi_compare_atomicmsg calls allow
the user to specify flags which can change the default data transfer operation.
Flags specified with atomic message operations override most flags
previously configured with the endpoint, except where noted (see fi_control).
The following list of flags are usable with atomic message calls.
.IP "FI_EVENT"
Indicates that a completion entry should be generated for the specified
operation. The endpoint must be bound to an event queue
with FI_EVENT that corresponds to the specified operation, or this flag
is ignored.
.IP "FI_MORE"
Indicates that the user has additional requests that will immediately be
posted after the current call returns. Use of this flag may improve
performance by enabling the provider to optimize its access to the fabric
hardware.
.IP "FI_REMOTE_SIGNAL"
Indicates that a completion event at the target process should be generated
for the given operation. The remote endpoint must be configured with
FI_REMOTE_SIGNAL, or this flag will be ignored by the target.
.IP "FI_INJECT"
Indicates that the outbound non-const data buffers (buf and compare parameters)
should be returned to user immediately after the call returns, even if the
operation is handled asynchronously. This may require that the underlying
provider implementation copy the data into a local buffer and transfer out of
that buffer. The use of output result buffers are not affected by this flag.
.SH "RETURN VALUE"
Returns 0 on success. On error, a negative value corresponding to fabric
errno is returned. Fabric errno values are defined in
.IR "rdma/fi_errno.h".
.SH "ERRORS"
.IP "-FI_EOPNOTSUPP"
The requested atomic operation is not supported on this endpoint.
.IP "-FI_EMSGSIZE"
The number of atomic operations in a single request exceeds that supported
by the underlying provider.
.SH "NOTES"
.SH "SEE ALSO"
fi_getinfo(3), fi_endpoint(3), fi_domain(3), fi_eq(3), fi_rma(3)

Просмотреть файл

@ -1,299 +1,316 @@
.TH "FI_AV" 3 "2014-11-11" "Libfabric v0.0.2" "Libfabric Programmer's Manual" libfabric
.TH fi_av 3 "2014-11-14" "Libfabric Programmer\[aq]s Manual" "\@VERSION\@"
.SH NAME
fi_av \- Address vector operations
.PP
fi_av_open / fi_close
.RS
Open or close an address vector
.RE
fi_av - Address vector operations
.PP
fi_av_bind
.RS
Associate an address vector with an event queue.
.RE
fi_av_open / fi_close : Open or close an address vector
.PP
fi_av_insert / fi_av_insertsvc / fi_av_remove
.RS
Insert/remove an address into/from the address vector.
.RE
fi_av_bind : Associate an address vector with an event queue.
.PP
fi_av_lookup
.RS
Retrieve an address stored in the address vector.
.RE
fi_av_insert / fi_av_insertsvc / fi_av_remove : Insert/remove an address
into/from the address vector.
.PP
fi_av_straddr
.RS
Convert an address into a printable string.
.RE
fi_av_lookup : Retrieve an address stored in the address vector.
.PP
fi_av_straddr : Convert an address into a printable string.
.SH SYNOPSIS
.B #include <rdma/fi_domain.h>
.HP
.BI "int fi_av_open(struct fid_domain *" domain ", struct fi_av_attr *" attr ", "
.BI "struct fid_av **" av ", void * " context ");"
.HP
.BI "int fi_close(struct fid *" av ");"
.PP
.HP
.BI "int fi_av_bind(struct fid_av *" av ", struct fid *" eq ", uint64_t " flags ");"
.PP
.HP
.BI "int fi_av_insert(struct fid_av *" av ", void *" addr ", size_t " count ", "
.BI "fi_addr_t *" fi_addr ", uint64_t " flags ", void *" context ");"
.HP
.BI "int fi_av_insertsvc(struct fid_av *" av ", const char *" node ", "
.BI "const char *" service ", fi_addr_t *" fi_addr ", uint64_t " flags ", "
.BI "void *" context ");"
.HP
.BI "int fi_av_insertsym(struct fid_av *" av ", const char *" node ", "
.BI "size_t " nodecnt ", const char *" service ", size_t " svccnt ", "
.BI "fi_addr_t *" fi_addr ", uint64_t " flags ", void *" context ");"
.HP
.BI "int fi_av_remove(struct fid_av *" av ", fi_addr_t " fi_addr ", size_t " count ", "
.BI "uint64_t " flags ");"
.HP
.BI "int fi_av_lookup(struct fid_av *" av ", fi_addr_t " fi_addr ", "
.BI "void *" addr ", size_t *" addrlen ");"
.HP
.BI "fi_addr_t fi_rx_addr(fi_addr_t " fi_addr ", int " rx_index ", "
.BI "int " rx_ctx_bits ");"
.HP
.BI "const char * fi_av_straddr(struct fid_av *" av ", const void *" addr ", "
.BI "void *" buf ", size_t " len ");"
.IP
.nf
\f[C]
#include\ <rdma/fi_domain.h>
int\ fi_av_open(struct\ fid_domain\ *domain,\ struct\ fi_av_attr\ *attr,
\ \ \ \ struct\ fid_av\ **av,\ void\ *context);
int\ fi_close(struct\ fid\ *av);
int\ fi_av_bind(struct\ fid_av\ *av,\ struct\ fid\ *eq,\ uint64_t\ flags);
int\ fi_av_insert(struct\ fid_av\ *av,\ void\ *addr,\ size_t\ count,
\ \ \ \ fi_addr_t\ *fi_addr,\ uint64_t\ flags,\ void\ *context);
int\ fi_av_insertsvc(struct\ fid_av\ *av,\ const\ char\ *node,
\ \ \ \ const\ char\ *service,\ fi_addr_t\ *fi_addr,\ uint64_t\ flags,
\ \ \ \ void\ *context);
int\ fi_av_insertsym(struct\ fid_av\ *av,\ const\ char\ *node,
\ \ \ \ size_t\ nodecnt,\ const\ char\ *service,\ size_t\ svccnt,
\ \ \ \ fi_addr_t\ *fi_addr,\ uint64_t\ flags,\ void\ *context);
int\ fi_av_remove(struct\ fid_av\ *av,\ fi_addr_t\ fi_addr,\ size_t\ count,
\ \ \ \ uint64_t\ flags);
int\ fi_av_lookup(struct\ fid_av\ *av,\ fi_addr_t\ fi_addr,
\ \ \ \ void\ *addr,\ size_t\ *addrlen);
fi_addr_t\ fi_rx_addr(fi_addr_t\ fi_addr,\ int\ rx_index,
\ \ \ \ \ \ int\ rx_ctx_bits);
const\ char\ *\ fi_av_straddr(struct\ fid_av\ *av,\ const\ void\ *addr,
\ \ \ \ \ \ void\ *buf,\ size_t\ len);
\f[]
.fi
.SH ARGUMENTS
.IP "domain"
Resource domain
.IP "av"
Address vector
.IP "eq"
Event queue
.IP "attr"
Address vector attributes
.IP "context"
User specified context associated with the address vector or insert opertion.
.IP "addr"
Buffer containing one or more addresses to insert into address vector.
.IP "addrlen"
On input, specifies size of addr buffer. On output, stores number of bytes
written to addr buffer.
.IP "fi_addr"
For insert, a reference to an array where returned fabric addresses
will be written.
.br
.PP
\f[I]domain\f[] : Resource domain
.PP
\f[I]av\f[] : Address vector
.PP
\f[I]eq\f[] : Event queue
.PP
\f[I]attr\f[] : Address vector attributes
.PP
\f[I]context\f[] : User specified context associated with the address
vector or insert operation.
.PP
\f[I]addr\f[] : Buffer containing one or more addresses to insert into
address vector.
.PP
\f[I]addrlen\f[] : On input, specifies size of addr buffer.
On output, stores number of bytes written to addr buffer.
.PP
\f[I]fi_addr\f[] : For insert, a reference to an array where returned
fabric addresses will be written.
For remove, one or more fabric addresses to remove.
.IP "count"
Number of addresses to insert/remove from an AV.
.IP "flags"
Additional flags to apply to the operation.
.SH "DESCRIPTION"
.PP
\f[I]count\f[] : Number of addresses to insert/remove from an AV.
.PP
\f[I]flags\f[] : Additional flags to apply to the operation.
.SH DESCRIPTION
.PP
Address vectors are used to map higher level addresses, which may be
more natural for an application to use, into fabric specific addresses.
The mapping of addresses is fabric and provider specific, but may involve
lengthy address resolution and fabric management protocols. AV operations
are synchronous by default, but may be set to operate asynchronously by
specifying the FI_EVENT flag to
.B fi_av_open.
When requesting asynchronous operation, the
application must first bind an event queue to the AV before inserting
addresses.
.SS "fi_av_open"
fi_av_open allocates or opens an address vector. The properties and behavior of
the address vector are defined by struct fi_av_attr.
The mapping of addresses is fabric and provider specific, but may
involve lengthy address resolution and fabric management protocols.
AV operations are synchronous by default, but may be set to operate
asynchronously by specifying the FI_EVENT flag to \f[C]fi_av_open\f[].
When requesting asynchronous operation, the application must first bind
an event queue to the AV before inserting addresses.
.SS fi_av_open
.PP
fi_av_open allocates or opens an address vector.
The properties and behavior of the address vector are defined by
\f[C]struct\ fi_av_attr\f[].
.IP
.nf
struct fi_av_attr {
enum fi_av_type type; /* type of AV */
int rx_ctx_bits; /* address bits to identify rx ctx */
size_t count; /* # entries for AV */
size_t ep_per_node; /* # endpoints per fabric address */
const char *name; /* system name of AV */
void *map_addr; /* base mmap address */
uint64_t flags; /* operation flags */
\f[C]
struct\ fi_av_attr\ {
\ \ \ \ enum\ fi_av_type\ \ type;\ \ \ \ \ \ \ \ /*\ type\ of\ AV\ */
\ \ \ \ int\ \ \ \ \ \ \ \ \ \ \ \ \ \ rx_ctx_bits;\ /*\ address\ bits\ to\ identify\ rx\ ctx\ */
\ \ \ \ size_t\ \ \ \ \ \ \ \ \ \ \ count;\ \ \ \ \ \ \ /*\ #\ entries\ for\ AV\ */
\ \ \ \ size_t\ \ \ \ \ \ \ \ \ \ \ ep_per_node;\ /*\ #\ endpoints\ per\ fabric\ address\ */
\ \ \ \ const\ char\ \ \ \ \ \ \ *name;\ \ \ \ \ \ \ /*\ system\ name\ of\ AV\ */
\ \ \ \ void\ \ \ \ \ \ \ \ \ \ \ \ \ *map_addr;\ \ \ /*\ base\ mmap\ address\ */
\ \ \ \ uint64_t\ \ \ \ \ \ \ \ \ flags;\ \ \ \ \ \ \ /*\ operation\ flags\ */
};
\f[]
.fi
.IP "type"
An AV type corresponds to a conceptual implementation of an address vector.
.PP
\f[I]type\f[] : An AV type corresponds to a conceptual implementation of
an address vector.
The type specifies how an application views data stored in the AV,
including how it may be accessed. Valid values are:
.RS
.IP "FI_AV_MAP"
Addresses which are inserted into an AV are mapped to a native fabric
address for use by the application. The use of FI_AV_MAP requires that
an application store the returned fi_addr_t value that is associated with
each inserted address. The advantage of using FI_AV_MAP is that the returned
fi_addr_t value may contain encoded address data, which is immediately
available when processing data transfer requests. This can eliminate
or reduce the number of memory lookups needed when initiating a transfer.
including how it may be accessed.
Valid values are:
.IP \[bu] 2
\f[I]FI_AV_MAP\f[] : Addresses which are inserted into an AV are mapped
to a native fabric address for use by the application.
The use of FI_AV_MAP requires that an application store the returned
fi_addr_t value that is associated with each inserted address.
The advantage of using FI_AV_MAP is that the returned fi_addr_t value
may contain encoded address data, which is immediately available when
processing data transfer requests.
This can eliminate or reduce the number of memory lookups needed when
initiating a transfer.
The disadvantage of FI_AV_MAP is the increase in memory usage needed to
store the returned addresses.
Addresses are stored in the AV using a provider specific
mechanism, including, but not limited to a tree, hash table, or maintained
on the heap.
.IP "FI_AV_TABLE"
Addresses which are inserted into an AV of type FI_AV_TABLE are accessible
using a simple index. Conceptually, the AV may be treated as an array
of addresses, though the provider may implement the AV using a variety
of mechanisms. When FI_AV_TABLE is used, the returned fi_addr_t is an
index, with the index for an inserted address the same as its
insertion order into the table.
The index of the first address inserted into an FI_AV_TABLE will be 0, and
successive insertions will be given sequential indices.
Sequential indices will be assigned across insertion calls on the same AV.
.RE
.IP "Receive Context Bits (rx_ctx_bits)"
The receive context bits field is only for use with scalable endpoints. It
indicates the number of bits reserved in a returned fi_addr_t, which will
be used to identify a specific target receive context. See fi_rx_addr()
and fi_endpoint(3) for additional details on receive contexts. The
requested number of bits should be selected such that 2 ^ rx_ctx_bits >=
rx_ctx_cnt for the endpoint.
.IP "count"
Indicates the expected number of addresses that will be inserted into
the AV. The provider uses this to optimize resource allocations.
.IP "ep_per_node"
This field indicates the number of endpoints that will be associated
with a specific fabric, or network, address. If the number of endpoints
per node is unknown, this value should be set to 0.
Addresses are stored in the AV using a provider specific mechanism,
including, but not limited to a tree, hash table, or maintained on the
heap.
.IP \[bu] 2
\f[I]FI_AV_TABLE\f[] : Addresses which are inserted into an AV of type
FI_AV_TABLE are accessible using a simple index.
Conceptually, the AV may be treated as an array of addresses, though the
provider may implement the AV using a variety of mechanisms.
When FI_AV_TABLE is used, the returned fi_addr_t is an index, with the
index for an inserted address the same as its insertion order into the
table.
The index of the first address inserted into an FI_AV_TABLE will be 0,
and successive insertions will be given sequential indices.
Sequential indices will be assigned across insertion calls on the same
AV.
.PP
\f[I]Receive Context Bits (rx_ctx_bits)\f[] : The receive context bits
field is only for use with scalable endpoints.
It indicates the number of bits reserved in a returned fi_addr_t, which
will be used to identify a specific target receive context.
See fi_rx_addr() and fi_endpoint(3) for additional details on receive
contexts.
The requested number of bits should be selected such that 2 ^
rx_ctx_bits >= rx_ctx_cnt for the endpoint.
.PP
\f[I]count\f[] : Indicates the expected number of addresses that will be
inserted into the AV.
The provider uses this to optimize resource allocations.
.PP
\f[I]ep_per_node\f[] : This field indicates the number of endpoints that
will be associated with a specific fabric, or network, address.
If the number of endpoints per node is unknown, this value should be set
to 0.
The provider uses this value to optimize resource allocations.
For example, distributed, parallel applications may set this to the number
of processes allocated per node, times the number of endpoints each process
will open.
.IP "name"
An optional system name associated with the address vector to create or open.
Address vectors may be shared across multiple processes which access
the same named domain on the same node. The name field allows the
underlying provider to identify a shared AV.
.sp
For example, distributed, parallel applications may set this to the
number of processes allocated per node, times the number of endpoints
each process will open.
.PP
\f[I]name\f[] : An optional system name associated with the address
vector to create or open.
Address vectors may be shared across multiple processes which access the
same named domain on the same node.
The name field allows the underlying provider to identify a shared AV.
.PP
If the name field is non-NULL and the AV is not opened for read-only
access, a named AV will be created, if it does not already exist.
.IP "map_addr"
The map_addr determines the base FI_ADDR address that a provider
should use when sharing an AV of type FI_AV_MAP between processes.
.PP
\f[I]map_addr\f[] : The map_addr determines the base FI_ADDR address
that a provider should use when sharing an AV of type FI_AV_MAP between
processes.
Processes that provide the same value for map_addr to a shared AV may
use the same FI_ADDR values returned from an fi_av_insert call.
.sp
.PP
The map_addr may be used by the provider to mmap memory allocated for a
shared AV between processes; however, the provider is not required to use
the map_addr in this fashion. The only requirement is that an FI_ADDR
returned as part of an fi_av_insert call on one process is usable on
another process which opens an AV of the same name at the same map_addr
value. The relationship between the map_addr and any returned FI_ADDR
is not defined.
.sp
shared AV between processes; however, the provider is not required to
use the map_addr in this fashion.
The only requirement is that an FI_ADDR returned as part of an
fi_av_insert call on one process is usable on another process which
opens an AV of the same name at the same map_addr value.
The relationship between the map_addr and any returned FI_ADDR is not
defined.
.PP
If name is non-NULL and map_addr is 0, then the map_addr used by the
provider will be returned through the attribute structure. The map_addr
field is ignored if name is NULL.
.IP "flags"
The following flags may be used when opening an AV.
.RS
.IP "FI_EVENT"
When the flag FI_EVENT is specified, all insert operations on this
AV will occur
asynchronously. There will be one EQ error entry generated for each
failed address insertion, followed by one non-error event indicating that the
insertion operation has completed.
There will always be one non-error completion event for each
insert operation, even if all addresses fail. The context field in all
completions will be the context specified to the insert call, and the data
field in the final completion entry will report the number of addresses
successfully inserted.
.sp
If an AV is opened with FI_EVENT, any insertions attempted before an EQ
is bound to the AV will fail with -FI_ENOEQ.
.sp
Error completions for failed insertions will contain the index of the failed
address in the index field of the error completion entry.
.sp
Note that the order of delivery of insert completions may not match
the order in which the calls to fi_av_insert were made. The only guarantee
is that all error completions for a given call to fi_av_insert will preceed
the single associated non-error completion.
.IP "FI_READ"
Opens an AV for read-only access. An AV opened for read-only access
must be named (name attribute specified), and the AV must exist.
.IP "FI_SYMMETRIC"
Indicates that each node will be associated with the
same number of endpoints, the same transport addresses will be allocated
on each node, and the transport addresses will be sequential. This feature
targets distributed applications on large fabrics and allows for
highly-optimized storage of remote endpoint addressing.
.RE
.SS "fi_close"
provider will be returned through the attribute structure.
The map_addr field is ignored if name is NULL.
.PP
\f[I]flags\f[] : The following flags may be used when opening an AV.
.IP \[bu] 2
\f[I]FI_EVENT\f[] : When the flag FI_EVENT is specified, all insert
operations on this AV will occur asynchronously.
There will be one EQ error entry generated for each failed address
insertion, followed by one non-error event indicating that the insertion
operation has completed.
There will always be one non-error completion event for each insert
operation, even if all addresses fail.
The context field in all completions will be the context specified to
the insert call, and the data field in the final completion entry will
report the number of addresses successfully inserted.
: \ : If an AV is opened with FI_EVENT, any insertions attempted before
an EQ is bound to the AV will fail with -FI_ENOEQ.
: \ : Error completions for failed insertions will contain the index of
the failed address in the index field of the error completion entry.
: \ : Note that the order of delivery of insert completions may not
match the order in which the calls to fi_av_insert were made.
The only guarantee is that all error completions for a given call to
fi_av_insert will precede the single associated non-error completion.
.IP \[bu] 2
\f[I]FI_READ\f[] : Opens an AV for read-only access.
An AV opened for read-only access must be named (name attribute
specified), and the AV must exist.
.IP \[bu] 2
\f[I]FI_SYMMETRIC\f[] : Indicates that each node will be associated with
the same number of endpoints, the same transport addresses will be
allocated on each node, and the transport addresses will be sequential.
This feature targets distributed applications on large fabrics and
allows for highly-optimized storage of remote endpoint addressing.
.SS fi_close
.PP
The fi_close call is used to release all resources associated with an
address vector. Note that any events queued on an event queue referencing
the AV are left untouched. It is recommended that callers retrieve all
events associated with the AV before closing it.
.SS "fi_av_bind"
Associates an event queue with the AV. If an AV has been opened with
.B FI_EVENT,
then an event queue must be bound to the AV before any insertion
calls are attempted. Any calls to insert addresses before an event queue
has been bound will fail with -FI_ENOEQ.
.SS "fi_av_insert"
The fi_av_insert call inserts zero or more addresses into an AV. The number
of addresses is specified through the count parameter. The addr parameter
references an array of addresses to insert into the AV. Addresses
inserted into an address vector must be in the same format as specified
in struct fi_info:addr_format for the corresponding domain. A NULL
value for an address may be used to indicate that an entry should be
associated with 'any' address (similar to the IPv4 address of 0.0.0.0).
address vector.
Note that any events queued on an event queue referencing the AV are
left untouched.
It is recommended that callers retrieve all events associated with the
AV before closing it.
.SS fi_av_bind
.PP
For AV's of type FI_AV_MAP, once inserted addresses have been mapped,
the mapped values are written into the buffer referenced by fi_addr.
The fi_addr buffer must remain valid until the
AV insertion has completed and an event has been generated
to an associated event queue. The value of the returned fi_addr should
be considered opaque by the application for AVs of type FI_AV_MAP.
Associates an event queue with the AV.
If an AV has been opened with \f[C]FI_EVENT\f[], then an event queue
must be bound to the AV before any insertion calls are attempted.
Any calls to insert addresses before an event queue has been bound will
fail with \f[C]-FI_ENOEQ\f[].
.SS fi_av_insert
.PP
The fi_av_insert call inserts zero or more addresses into an AV.
The number of addresses is specified through the count parameter.
The addr parameter references an array of addresses to insert into the
AV.
Addresses inserted into an address vector must be in the same format as
specified in struct fi_info:addr_format for the corresponding domain.
A NULL value for an address may be used to indicate that an entry should
be associated with \[aq]any\[aq] address (similar to the IPv4 address of
0.0.0.0).
.PP
For AV\[aq]s of type FI_AV_MAP, once inserted addresses have been
mapped, the mapped values are written into the buffer referenced by
fi_addr.
The fi_addr buffer must remain valid until the AV insertion has
completed and an event has been generated to an associated event queue.
The value of the returned fi_addr should be considered opaque by the
application for AVs of type FI_AV_MAP.
The returned value may point to an internal structure or a provider
specific encoding of low-level addressing data, for example. In the latter
case, use of FI_AV_MAP may be able to avoid memory references during data
transfer operations.
specific encoding of low-level addressing data, for example.
In the latter case, use of FI_AV_MAP may be able to avoid memory
references during data transfer operations.
.PP
For AV\[aq]s of type FI_AV_TABLE, addresses are placed into the table in
order.
That is, the first address inserted may be referenced at index 0.
The fi_addr parameter may be NULL in this case.
Otherwise, fi_addr must reference an array of fi_addr_t, and the buffer
must remain valid until the insertion operation completes.
Note that if fi_addr is NULL and synchronous operation is requested,
individual insertion failures cannot be reported and the application
must use other calls, such as \f[C]fi_av_lookup\f[] to learn which
specific addresses failed to insert.
When addresses are inserted into an AV of type FI_AV_TABLE, the assigned
fi_addr values will be simple indices corresponding to the entry into
the table where the address was inserted.
Addresses are indexed in order of their insertion.
Index values accumulate across successive insert calls in the order the
calls are made, not necessarily in the order the insertions complete.
.PP
\f[I]flags\f[] : The following flag may be passed to fi_av_insert
.IP \[bu] 2
\f[I]FI_MORE\f[] : In order to allow optimized address insertion, the
application may specify the FI_MORE flag to the insert call to give a
hint to the provider that more insertion requests will follow, allowing
the provider to aggregate insertion requests if desired.
Providers are free to ignore FI_MORE.
.SS fi_av_insertsvc
.PP
For AV's of type FI_AV_TABLE, addresses are placed into the table in
order. That is, the first address inserted may be referenced at
index 0. The fi_addr parameter may be NULL in this case. Otherwise,
fi_addr must reference an array of fi_addr_t, and the buffer must
remain valid until the insertion operation completes. Note that if fi_addr
is NULL and synchronous operation is requested, individual insertion failures
cannot be reported and the application must use other calls, such as
.B fi_av_lookup
to learn which specific addresses failed to insert.
When addresses
are inserted into an AV of type FI_AV_TABLE, the assigned fi_addr values
will be simple indices corresponding to the entry into the table where the
address was inserted. Addresses are indexed in order of their insertion.
Index values accumulate across successive insert calls in the order the calls
are made, not necessarily in the order the insertions complete.
.IP "flags"
The following flag may be passed to fi_av_insert
.RS
.IP "FI_MORE"
In order to allow optimized address insertion, the application may
specify the FI_MORE flag to the insert call to give a hint to the provider
that more insertion requests will follow, allowing the provider to aggregate
insertion requests if desired. Providers are free to ignore FI_MORE.
.RE
.SS "fi_av_insertsvc"
The fi_av_insertsvc call behaves similar to fi_av_insert, but allows the
application to specify the node and service names, similar to the
fi_getinfo inputs, rather than an encoded address. The node and service
parameters are defined the same as fi_getinfo(3). Node should be a string
that corresponds to a hostname or network address. The service string
corresponds to a textual representation of a transport address.
.SS "fi_av_insertsym"
fi_av_insertsym performs a symmetric insert that inserts a sequential
range of nodes and/or service addresses into an AV. The svccnt parameter
indicates the number of transport (endpoint) addresses to insert into the AV
for each node address, with the service parameter specifying the starting
transport address. Inserted transport addresses will be of the range
{service, service + svccnt - 1}, inclusive. All service addresses for a
node will be inserted before the next node is inserted.
fi_getinfo inputs, rather than an encoded address.
The node and service parameters are defined the same as fi_getinfo(3).
Node should be a string that corresponds to a hostname or network
address.
The service string corresponds to a textual representation of a
transport address.
.SS fi_av_insertsym
.PP
The nodecnt parameter indicates the number of node (network) addresses to
insert into the AV, with the node parameter specifying the starting
node address. Inserted node addresses will be of the range
{node, node + nodecnt - 1}, inclusive. If node is a non-numeric string,
such as a hostname, it must contain a numeric suffix if nodecnt > 1.
fi_av_insertsym performs a symmetric insert that inserts a sequential
range of nodes and/or service addresses into an AV.
The svccnt parameter indicates the number of transport (endpoint)
addresses to insert into the AV for each node address, with the service
parameter specifying the starting transport address.
Inserted transport addresses will be of the range {service, service +
svccnt - 1}, inclusive.
All service addresses for a node will be inserted before the next node
is inserted.
.PP
The nodecnt parameter indicates the number of node (network) addresses
to insert into the AV, with the node parameter specifying the starting
node address.
Inserted node addresses will be of the range {node, node + nodecnt - 1},
inclusive.
If node is a non-numeric string, such as a hostname, it must contain a
numeric suffix if nodecnt > 1.
.PP
As an example, if node = "10.1.1.1", nodecnt = 2, service = "5000", and
svccnt = 2, the following addresses will be inserted into the AV in the
@ -302,60 +319,75 @@ If node were replaced by the hostname "host10", the addresses would be:
host10:5000, host10:5001, host11:5000, host11:5001.
.PP
The total number of inserted addresses will be nodecnt x svccnt.
.SS "fi_av_remove"
fi_av_remove removes a set of addresses from an address vector. All
resources associated with the indicated addresses are released, and
.SS fi_av_remove
.PP
fi_av_remove removes a set of addresses from an address vector.
All resources associated with the indicated addresses are released, and
no future references to either the mapped address (in the case of
FI_AV_MAP) or index (FI_AV_TABLE) are allowed.
.PP
The use of fi_av_remove is an optimization that applications may use
to free memory allocated with addresses that will no longer be accessed.
Inserted addresses are not required to be removed. fi_av_close will
automatically cleanup any resources associated with addresses remaining
in the AV when it is invoked.
.SS "fi_av_lookup"
This call returns the address stored in the address vector that corresponds
to the given fi_addr. The returned address is the same format as those
stored by the AV. On input, the addrlen parameter should
indicate the size of the addr buffer. If the actual address is larger than
what can fit into the buffer, it will be truncated. On output, addrlen
is set to the size of the buffer needed to store the address, which may
be larger than the input value.
.SS "fi_rx_addr"
This function is used to convert an endpoint address, returned by fi_av_insert,
into an address that specifies a target receive context. The specified
fi_addr parameter must either be a value returned from fi_av_insert, in the
case of FI_AV_MAP, or an index, in the case of FI_AV_TABLE. The value for
rx_ctx_bits must match that specified in the AV attributes for the given
address.
.P
The use of fi_av_remove is an optimization that applications may use to
free memory allocated with addresses that will no longer be accessed.
Inserted addresses are not required to be removed.
fi_av_close will automatically cleanup any resources associated with
addresses remaining in the AV when it is invoked.
.SS fi_av_lookup
.PP
This call returns the address stored in the address vector that
corresponds to the given fi_addr.
The returned address is the same format as those stored by the AV.
On input, the addrlen parameter should indicate the size of the addr
buffer.
If the actual address is larger than what can fit into the buffer, it
will be truncated.
On output, addrlen is set to the size of the buffer needed to store the
address, which may be larger than the input value.
.SS fi_rx_addr
.PP
This function is used to convert an endpoint address, returned by
fi_av_insert, into an address that specifies a target receive context.
The specified fi_addr parameter must either be a value returned from
fi_av_insert, in the case of FI_AV_MAP, or an index, in the case of
FI_AV_TABLE.
The value for rx_ctx_bits must match that specified in the AV attributes
for the given address.
.PP
Connected endpoints that support multiple receive contexts, but are not
associated with address vectors should specify FI_ADDR_NOTAVAIL for the
fi_addr parameter.
.SS "fi_av_straddr"
The fi_av_straddr function converts the provided address into a printable
string. The specified address must be of the same format as those
stored by the AV, though the address itself is not required to have been
inserted. On input, the len parameter should specify the size of the buffer
referenced by buf. On output, the actual size needed to write the entire
string will be returned. This size may be larger than the input len. If
the provided buffer is too small, the results will be truncated. fi_av_straddr
returns a pointer to buf.
.SH "NOTES"
Providers may implement AV's using a variety of mechanisms. Specifically,
a provider may begin resolving inserted addresses as soon as they have
been added to an AV, even if asynchronous operation has been specified.
Similarly, a provider may lazily release resources from removed entries.
.SH "RETURN VALUES"
.SS fi_av_straddr
.PP
The fi_av_straddr function converts the provided address into a
printable string.
The specified address must be of the same format as those stored by the
AV, though the address itself is not required to have been inserted.
On input, the len parameter should specify the size of the buffer
referenced by buf.
On output, the actual size needed to write the entire string will be
returned.
This size may be larger than the input len.
If the provided buffer is too small, the results will be truncated.
fi_av_straddr returns a pointer to buf.
.SH NOTES
.PP
Providers may implement AV\[aq]s using a variety of mechanisms.
Specifically, a provider may begin resolving inserted addresses as soon
as they have been added to an AV, even if asynchronous operation has
been specified.
Similarly, a provider may lazily release resources from removed entries.
.SH RETURN VALUES
.PP
The insert calls return the number of addresses successfully inserted or
the number of asynchronous insertions initiated if FI_EVENT is set.
.PP
Other calls return 0 on success.
.PP
On error, a negative value corresponding to
fabric errno is returned.
Fabric errno values are defined in
.IR "rdma/fi_errno.h".
.SH "ERRORS"
.SH "SEE ALSO"
fi_getinfo(3), fi_endpoint(3), fi_domain(3), fi_eq(3)
On error, a negative value corresponding to fabric errno is returned.
Fabric errno values are defined in \f[C]rdma/fi_errno.h\f[].
.SH ERRORS
.SH SEE ALSO
.PP
\f[C]fi_getinfo\f[](3), \f[C]fi_endpoint\f[](3), \f[C]fi_domain\f[](3),
\f[C]fi_eq\f[](3)
.SH AUTHORS
OpenFabrics.

Просмотреть файл

@ -1,177 +1,205 @@
.TH "FI_CM" 3 "2014-11-11" "Libfabric v0.0.2" "Libfabric Programmer's Manual" libfabric
.TH fi_cm 3 "2014\-12\-03" "Libfabric Programmer\[aq]s Manual" "\@VERSION\@"
.SH NAME
.PP
fi_cm - Connection management operations
.PP
fi_connect / fi_listen / fi_accept / fi_reject / fi_shutdown
.RS
Manage endpoint connection state.
.RE
fi_connect / fi_listen / fi_accept / fi_reject / fi_shutdown : Manage
endpoint connection state.
.PP
fi_getname / fi_getpeer
.RS
Return local or peer endpoint address
.RE
fi_getname / fi_getpeer : Return local or peer endpoint address
.PP
fi_join / fi_leave
.RS
Have an endpoint join or leave a multicast group.
.RE
fi_join / fi_leave : Have an endpoint join or leave a multicast group.
.SH SYNOPSIS
.B #include <rdma/fi_cm.h>
.HP
.BI "int fi_connect(struct fid_ep *" ep ", const void *" addr ","
.BI "const void * " param ", size_t " paramlen ");"
.HP
.BI "int fi_listen(struct fid_pep *" pep ");"
.HP
.BI "int fi_accept(struct fid_ep *" ep ", fi_connreq_t " connreq ","
.BI "const void *" param ", size_t " paramlen ");"
.HP
.BI "int fi_reject(struct fid_pep *" pep ", fi_connreq_t " connreq ","
.BI "const void *" param ", size_t " paramlen ");"
.HP
.BI "int fi_shutdown(struct fid_ep *" ep ", uint64_t " flags ");"
.PP
.HP
.BI "int fi_getname(fid_t " fid ", void *" addr ", size_t *" addrlen ");"
.HP
.BI "int fi_getpeer(struct fid_ep *" ep ", void *" addr ", size_t *" addrlen ");"
.PP
.HP
.BI "int fi_join(struct fid_ep *" ep ", void *" addr ", fi_addr_t *" fi_addr ","
.BI "uint64_t " flags ", void *" context ");"
.HP
.BI "int fi_leave(struct fid_ep *" ep ", void *" addr ", fi_addr_t " fi_addr ","
.BI "uint64_t " flags ");"
.IP
.nf
\f[C]
#include\ <rdma/fi_cm.h>
int\ fi_connect(struct\ fid_ep\ *ep,\ const\ void\ *addr,
\ \ \ \ const\ void\ *param,\ size_t\ paramlen);
int\ fi_listen(struct\ fid_pep\ *pep);
int\ fi_accept(struct\ fid_ep\ *ep,\ const\ void\ *param,\ size_t\ paramlen);
int\ fi_reject(struct\ fid_pep\ *pep,\ fi_connreq_t\ connreq,
\ \ \ \ const\ void\ *param,\ size_t\ paramlen);
int\ fi_shutdown(struct\ fid_ep\ *ep,\ uint64_t\ flags);
int\ fi_getname(fid_t\ fid,\ void\ *addr,\ size_t\ *addrlen);
int\ fi_getpeer(struct\ fid_ep\ *ep,\ void\ *addr,\ size_t\ *addrlen);
int\ fi_join(struct\ fid_ep\ *ep,\ void\ *addr,\ fi_addr_t\ *fi_addr,
\ \ \ \ uint64_t\ flags,\ void\ *context);
int\ fi_leave(struct\ fid_ep\ *ep,\ void\ *addr,\ fi_addr_t\ fi_addr,
\ \ \ \ uint64_t\ flags);
\f[]
.fi
.SH ARGUMENTS
.IP "ep / pep"
Fabric endpoint on which to change connection state.
.IP "addr"
Buffer to store queried address (get), or address to connect/join/leave.
.PP
\f[I]ep / pep\f[] : Fabric endpoint on which to change connection state.
.PP
\f[I]addr\f[] : Buffer to store queried address (get), or address to
connect/join/leave.
The address must be in the same format as that specified using fi_info:
addr_format when the endpoint was created.
.IP "addrlen"
On input, specifies size of addr buffer. On output, stores number of bytes
written to addr buffer.
.IP "param"
User-specified data exchanged as part of the connection exchange.
.IP "paramlen"
Size of param buffer.
.IP "info"
Fabric information associated with a connection request.
.IP "fi_addr"
Fabric address associated with a multicast address.
.IP "flags"
Additional flags for controlling connection operation.
.IP "context"
User context associated with the request.
.SH "DESCRIPTION"
.PP
\f[I]addrlen\f[] : On input, specifies size of addr buffer.
On output, stores number of bytes written to addr buffer.
.PP
\f[I]param\f[] : User-specified data exchanged as part of the connection
exchange.
.PP
\f[I]paramlen\f[] : Size of param buffer.
.PP
\f[I]info\f[] : Fabric information associated with a connection request.
.PP
\f[I]fi_addr\f[] : Fabric address associated with a multicast address.
.PP
\f[I]flags\f[] : Additional flags for controlling connection operation.
.PP
\f[I]context\f[] : User context associated with the request.
.SH DESCRIPTION
.PP
Connection management functions are used to connect an endpoint to a
remote address (in the case of a connectionless endpoint) or a peer
endpoint (for connection-oriented endpoints).
.SS "fi_listen"
.SS fi_listen
.PP
The fi_listen call indicates that the specified endpoint should be
transitioned into a passive connection state, allowing it to accept
incoming connection requests. Connection requests against a listening
endpoint are reported asynchronously to the user through a bound CM
event queue using the FI_CONNREQ event type. The number of outstanding
connection requests that can be queued at an endpoint is limited by the
listening endpoint's backlog parameter. The backlog is initialized
based on administrative configuration values, but may be adjusted
through the fi_control call.
.SS "fi_connect"
incoming connection requests.
Connection requests against a listening endpoint are reported
asynchronously to the user through a bound CM event queue using the
FI_CONNREQ event type.
The number of outstanding connection requests that can be queued at an
endpoint is limited by the listening endpoint\[aq]s backlog parameter.
The backlog is initialized based on administrative configuration values,
but may be adjusted through the fi_control call.
.SS fi_connect
.PP
For a connection-oriented endpoint, fi_connect initiates a connection
request to the destination address. For a connectionless endpoint,
fi_connect specifies the destination address that future data transfer
operations will target. This avoids the need for the user to specify the
address as part of the data transfer.
.SS "fi_accept / fi_reject"
request to the destination address.
For a connectionless endpoint, fi_connect specifies the destination
address that future data transfer operations will target.
This avoids the need for the user to specify the address as part of the
data transfer.
.SS fi_accept / fi_reject
.PP
The fi_accept and fi_reject calls are used on the passive (listening)
side of a connection to accept or reject a connection request,
respectively. To accept a connection, the listening application first
waits for a connection request event. After receiving such an event, it
allocates a new endpoint to accept the connection. fi_accept is invoked
with the newly allocated endpoint passed in as the fid parameter. If
the listening application wishes to reject a connection request, it calls
fi_reject with the listening endpoint passed in as the fid.
fi_reject takes a reference to the connection request as an input parameter.
respectively.
To accept a connection, the listening application first waits for a
connection request event (FI_CONNREQ).
After receiving such an event, the application allocates a new endpoint
to accept the connection.
This endpoint must be allocated using an fi_info structure referencing
the connreq from this FI_CONNREQ event.
fi_accept is then invoked with the newly allocated endpoint.
If the listening application wishes to reject a connection request, it
calls fi_reject with the listening endpoint and a reference to the
connection request.
.PP
A successfully accepted connection request will result in the active
(connecting) endpoint seeing an FI_CONNECTED event on its associated
event queue. A rejected or failed connection request will generate an
error event. The error entry will provide additional details describing
the reason for the failed attempt.
event queue.
A rejected or failed connection request will generate an error event.
The error entry will provide additional details describing the reason
for the failed attempt.
.PP
An FI_CONNECTED event will also be generated on the passive side for the
accepting endpoint once the connection has been properly established.
The fid of the FI_CONNECTED event will be that of the endpoint passed to
fi_accept as opposed to the listening passive endpoint.
Outbound data transfers cannot be initiated on a connection-oriented
endpoint until an FI_CONNECTED event has been generated. However, receive
buffers may be associated with an endpoint anytime.
endpoint until an FI_CONNECTED event has been generated.
However, receive buffers may be associated with an endpoint anytime.
.PP
For connection-oriented endpoints, the param buffer will be sent as part
of the connection request or response, subject to the constraints of the
underlying connection protocol.
Applications may use fi_control to determine the size of application
data that may be exchanged as part of a connection request or response.
The fi_connect, fi_accept, and fi_reject calls will silently truncate
any application data which cannot fit into underlying protocol messages.
.SS fi_shutdown
.PP
For connection-oriented endpoints, the param buffer will be sent as
part of the connection request or response, subject to the constraints of
the underlying connection protocol. Applications may use fi_control
to determine the size of application data that may be exchanged as
part of a connection request or response. The fi_connect, fi_accept, and
fi_reject calls will silently truncate any application data which cannot
fit into underlying protocol messages.
.SS "fi_shutdown"
The fi_shutdown call is used to gracefully disconnect an endpoint from
its peer. If shutdown flags are 0, the endpoint is fully disconnected,
and no additional data transfers will be possible. Flags may also be
used to indicate that only outbound (FI_WRITE) or inbound (FI_READ) data
transfers should be disconnected. Regardless of the shutdown option
selected, any queued completions associated with asynchronous operations
may still be retrieved from the corresponding event queues.
its peer.
If shutdown flags are 0, the endpoint is fully disconnected, and no
additional data transfers will be possible.
Flags may also be used to indicate that only outbound (FI_WRITE) or
inbound (FI_READ) data transfers should be disconnected.
Regardless of the shutdown option selected, any queued completions
associated with asynchronous operations may still be retrieved from the
corresponding event queues.
.PP
An FI_SHUTDOWN event will be generated for an endpoint when the remote
peer issues a disconnect using fi_shutdown or abruptly closes the endpoint.
.SS "fi_getname / fi_getpeer"
peer issues a disconnect using fi_shutdown or abruptly closes the
endpoint.
.SS fi_getname / fi_getpeer
.PP
The fi_getname and fi_getpeer calls may be used to retrieve the local or
peer endpoint address, respectively. On input, the addrlen parameter should
indicate the size of the addr buffer. If the actual address is larger than
what can fit into the buffer, it will be truncated. On output, addrlen
is set to the size of the buffer needed to store the address, which may
be larger than the input value.
.SS "fi_join / fi_leave"
peer endpoint address, respectively.
On input, the addrlen parameter should indicate the size of the addr
buffer.
If the actual address is larger than what can fit into the buffer, it
will be truncated.
On output, addrlen is set to the size of the buffer needed to store the
address, which may be larger than the input value.
.SS fi_join / fi_leave
.PP
fi_join and fi_leave are use to associate or dissociate an endpoint with
a multicast group. Join operations complete asynchronously, with the
completion reported through the event queue associated with the endpoint
or domain, if an event queue has not been bound to the endpoint.
.sp
A fabric address will be provided as part of the join request. The
address will be written to the memory location referenced by the fi_addr
parameter. This address must be used when issuing data transfer
operations to the multicast group. Because join operations are asynchronous,
the memory location referenced by the fi_addr parameter
must remain valid until an event associated with the join is reported,
or a corresponding call to leave the multicast group returns. Fi_addr is
not guaranteed to be set upon return from fi_join, and it is strongly
recommended that fi_addr not be declared on the stack, as data corruption
may result.
.sp
The fi_leave call will result in an endpoint leaving a multicast group. The
fi_leave call may be called even if the join operation has not completed, in
which case the join will be canceled if it has not yet completed.
.SH "FLAGS"
a multicast group.
Join operations complete asynchronously, with the completion reported
through the event queue associated with the endpoint or domain, if an
event queue has not been bound to the endpoint.
.PP
A fabric address will be provided as part of the join request.
The address will be written to the memory location referenced by the
fi_addr parameter.
This address must be used when issuing data transfer operations to the
multicast group.
Because join operations are asynchronous, the memory location referenced
by the fi_addr parameter must remain valid until an event associated
with the join is reported, or a corresponding call to leave the
multicast group returns.
Fi_addr is not guaranteed to be set upon return from fi_join, and it is
strongly recommended that fi_addr not be declared on the stack, as data
corruption may result.
.PP
The fi_leave call will result in an endpoint leaving a multicast group.
The fi_leave call may be called even if the join operation has not
completed, in which case the join will be canceled if it has not yet
completed.
.SH FLAGS
.PP
The fi_join call allows the user to specify flags requesting the type of
join operation being requested. Flags for fi_leave must be 0.
.IP "FI_SEND"
Setting FI_SEND, but not FI_RECV, indicates that the endpoint should
join the multicast group as a send-only member. If FI_RECV is also set
or neither FI_SEND or FI_RECV are set, then the endpoint will join the
group with send and receive capabilities.
.IP "FI_RECV"
Setting FI_RECV, but not FI_SEND, indicates that the endpoint should
join the multicast group as a receive-only member. If FI_SEND is also set
or neither FI_SEND or FI_RECV are set, then the endpoint will join the
group with send and receive capabilities.
.P
.SH "RETURN VALUE"
Returns 0 on success. On error, a negative value corresponding to fabric
errno is returned. Fabric errno values are defined in
.IR "rdma/fi_errno.h".
.SH "ERRORS"
.SH "NOTES"
.SH "SEE ALSO"
fi_getinfo(3), fi_endpoint(3), fi_domain(3), fi_eq(3)
join operation being requested.
Flags for fi_leave must be 0.
.PP
\f[I]FI_SEND\f[] : Setting FI_SEND, but not FI_RECV, indicates that the
endpoint should join the multicast group as a send-only member.
If FI_RECV is also set or neither FI_SEND or FI_RECV are set, then the
endpoint will join the group with send and receive capabilities.
.PP
\f[I]FI_RECV\f[] : Setting FI_RECV, but not FI_SEND, indicates that the
endpoint should join the multicast group as a receive-only member.
If FI_SEND is also set or neither FI_SEND or FI_RECV are set, then the
endpoint will join the group with send and receive capabilities.
.SH RETURN VALUE
.PP
Returns 0 on success.
On error, a negative value corresponding to fabric errno is returned.
Fabric errno values are defined in \f[C]rdma/fi_errno.h\f[].
.SH ERRORS
.SH NOTES
.SH SEE ALSO
.PP
\f[C]fi_getinfo\f[](3), \f[C]fi_endpoint\f[](3), \f[C]fi_domain\f[](3),
\f[C]fi_eq\f[](3)
.SH AUTHORS
OpenFabrics.

Просмотреть файл

@ -1,202 +1,219 @@
.TH "FI_CNTR" 3 "2014-11-11" "Libfabric v0.0.2" "Libfabric Programmer's Manual" libfabric
.TH fi_cntr 3 "2014\-11\-21" "Libfabric Programmer\[aq]s Manual" "\@VERSION\@"
.SH NAME
fi_cntr \- Completion and event counter operations
.PP
fi_cntr_open / fi_close
.RS
Allocate/free a counter
.RE
fi_cntr - Completion and event counter operations
.PP
fi_cntr_read
.RS
Read the current value of a counter
.RE
fi_cntr_open / fi_close : Allocate/free a counter
.PP
fi_cntr_readerr
.RS
Reads the number of operations which have completed in error.
.RE
fi_cntr_read : Read the current value of a counter
.PP
fi_cntr_add
.RS
Increment a counter by a specified value
.RE
fi_cntr_readerr : Reads the number of operations which have completed in
error.
.PP
fi_cntr_set
.RS
Set a counter to a specified value
.RE
fi_cntr_add : Increment a counter by a specified value
.PP
fi_cntr_wait
.RS
Wait for a counter to be greater or equal to a threshold value
.RE
fi_cntr_set : Set a counter to a specified value
.PP
fi_cntr_wait : Wait for a counter to be greater or equal to a threshold
value
.SH SYNOPSIS
.B #include <rdma/fi_domain.h>
.HP
.BI "int fi_cntr_open(struct fid_domain *" domain ", struct fi_cntr_attr *" attr ", "
.BI "struct fid_cntr **" cntr ", void * " context ");"
.HP
.BI "int fi_close(struct fid *" cntr ");"
.PP
.HP
.BI "uint64_t fi_cntr_read(struct fid_cntr *" cntr ");"
.HP
.BI "uint64_t fi_cntr_readerr(struct fid_cntr *" cntr ");"
.HP
.BI "int fi_cntr_add(struct fid_cntr *" cntr ", uint64_t " value ");"
.HP
.BI "int fi_cntr_set(struct fid_cntr *" cntr ", uint64_t " value ");"
.PP
.HP
.BI "int fi_cntr_wait(struct fid_cntr *" cntr ", uint64_t " threshold ", "
.BI "int " timeout ");"
.IP
.nf
\f[C]
#include\ <rdma/fi_domain.h>
int\ fi_cntr_open(struct\ fid_domain\ *domain,\ struct\ fi_cntr_attr\ *attr,
\ \ \ \ struct\ fid_cntr\ **cntr,\ void\ *context);
int\ fi_close(struct\ fid\ *cntr);
uint64_t\ fi_cntr_read(struct\ fid_cntr\ *cntr);
uint64_t\ fi_cntr_readerr(struct\ fid_cntr\ *cntr);
int\ fi_cntr_add(struct\ fid_cntr\ *cntr,\ uint64_t\ value);
int\ fi_cntr_set(struct\ fid_cntr\ *cntr,\ uint64_t\ value);
int\ fi_cntr_wait(struct\ fid_cntr\ *cntr,\ uint64_t\ threshold,
\ \ \ \ int\ timeout);
\f[]
.fi
.SH ARGUMENTS
.IP "domain"
Fabric domain
.IP "cntr"
Fabric counter
.IP "attr"
Counter attributes
.IP "context"
User specified context associated with the counter
.IP "value"
Value to increment or set counter
.IP "threshold"
Value to compare counter against
.IP "timeout"
Time in milliseconds to wait. A negative value indicates infinite timeout.
.SH "DESCRIPTION"
.PP
\f[I]domain\f[] : Fabric domain
.PP
\f[I]cntr\f[] : Fabric counter
.PP
\f[I]attr\f[] : Counter attributes
.PP
\f[I]context\f[] : User specified context associated with the counter
.PP
\f[I]value\f[] : Value to increment or set counter
.PP
\f[I]threshold\f[] : Value to compare counter against
.PP
\f[I]timeout\f[] : Time in milliseconds to wait.
A negative value indicates infinite timeout.
.SH DESCRIPTION
.PP
Counters record the number of requested operations that have completed.
Counters can provide a light-weight completion mechanism by suppressing
the generation of a full completion event. They are useful for applications
that only need to know the number of requests that have completed, and not
details about each request. For example, counters may be useful for
implementing credit based flow control or tracking the number of remote
processes which have responded to a request.
the generation of a full completion event.
They are useful for applications that only need to know the number of
requests that have completed, and not details about each request.
For example, counters may be useful for implementing credit based flow
control or tracking the number of remote processes which have responded
to a request.
.PP
Counters typically only count successful completions. However, if an
operation completes in error, it may increment an associated error value.
.SS "fi_cntr_open"
fi_cntr_open allocates a new fabric counter. The properties and behavior of
the counter are defined by struct fi_cntr_attr.
.sp
Counters typically only count successful completions.
However, if an operation completes in error, it may increment an
associated error value.
.SS fi_cntr_open
.PP
fi_cntr_open allocates a new fabric counter.
The properties and behavior of the counter are defined by
\f[C]struct\ fi_cntr_attr\f[].
.IP
.nf
struct fi_cntr_attr {
enum fi_cntr_events events; /* type of events to count */
enum fi_wait_obj wait_obj; /* requested wait object */
struct fid_wait *wait_set; /* optional wait set */
uint64_t flags; /* operation flags */
\f[C]
struct\ fi_cntr_attr\ {
\ \ \ \ enum\ fi_cntr_events\ \ events;\ \ \ \ /*\ type\ of\ events\ to\ count\ */
\ \ \ \ enum\ fi_wait_obj\ \ \ \ \ wait_obj;\ \ /*\ requested\ wait\ object\ */
\ \ \ \ struct\ fid_wait\ \ \ \ \ *wait_set;\ \ /*\ optional\ wait\ set\ */
\ \ \ \ uint64_t\ \ \ \ \ \ \ \ \ \ \ \ \ flags;\ \ \ \ \ /*\ operation\ flags\ */
};
\f[]
.fi
.IP "events"
A counter captures different types of events. The specific type which is to
counted are one of the following:
.RS
.IP "FI_CNTR_EVENTS_COMP"
The counter increments for every successful completion that occurs on an
associated bound endpoint. The type of completions -- sends and/or receives --
which are counted may be restricted using control flags when binding
the counter an the endpoint. Counters increment on all successful completions,
separately from whether the operation generates an entry in an event queue.
.RE
.IP "wait_obj"
Counters may be associated with a specific wait object. Wait objects allow
applications to block until the wait object is signaled, indicating that
a counter has reached a specific threshold. Users may use fi_control to
retrieve the underlying wait object associated with a counter, in order to
use it in other system calls. The following values may be used to specify
the type of wait object associated with a counter: FI_WAIT_NONE,
FI_WAIT_UNSPEC, FI_WAIT_SET, FI_WAIT_FD, and FI_WAIT_MUT_COND.
.RS
.IP "FI_WAIT_NONE"
Used to indicate that the user will not block (wait) for events on the counter.
.IP "FI_WAIT_UNSPEC"
Specifies that the user will only wait on the counter using fabric interface
calls, such as fi_cntr_readcond. In this case, the underlying provider may
select the most appropriate or highest performing wait object available,
including custom wait mechanisms. Applications that select
FI_WAIT_UNSPEC are not guaranteed to retrieve the underlying wait
.PP
\f[I]events\f[] : A counter captures different types of events.
The specific type which is to counted are one of the following:
.IP \[bu] 2
\f[I]FI_CNTR_EVENTS_COMP\f[] : The counter increments for every
successful completion that occurs on an associated bound endpoint.
The type of completions -- sends and/or receives -- which are counted
may be restricted using control flags when binding the counter an the
endpoint.
Counters increment on all successful completions, separately from
whether the operation generates an entry in an event queue.
.PP
\f[I]wait_obj\f[] : Counters may be associated with a specific wait
object.
.IP "FI_WAIT_SET"
Indicates that the event counter should use a wait set object to wait
for events. If specified, the wait_set field must reference an existing
wait set object.
.IP "FI_WAIT_FD"
Indicates that the counter should use a file descriptor as its wait mechanism.
Wait objects allow applications to block until the wait object is
signaled, indicating that a counter has reached a specific threshold.
Users may use fi_control to retrieve the underlying wait object
associated with a counter, in order to use it in other system calls.
The following values may be used to specify the type of wait object
associated with a counter: FI_WAIT_NONE, FI_WAIT_UNSPEC, FI_WAIT_SET,
FI_WAIT_FD, and FI_WAIT_MUT_COND.
.IP \[bu] 2
\f[I]FI_WAIT_NONE\f[] : Used to indicate that the user will not block
(wait) for events on the counter.
.IP \[bu] 2
\f[I]FI_WAIT_UNSPEC\f[] : Specifies that the user will only wait on the
counter using fabric interface calls, such as fi_cntr_readcond.
In this case, the underlying provider may select the most appropriate or
highest performing wait object available, including custom wait
mechanisms.
Applications that select FI_WAIT_UNSPEC are not guaranteed to retrieve
the underlying wait object.
.IP \[bu] 2
\f[I]FI_WAIT_SET\f[] : Indicates that the event counter should use a
wait set object to wait for events.
If specified, the wait_set field must reference an existing wait set
object.
.IP \[bu] 2
\f[I]FI_WAIT_FD\f[] : Indicates that the counter should use a file
descriptor as its wait mechanism.
A file descriptor wait object must be usable in select, poll, and epoll
routines. However, a provider may signal an FD wait object by marking it
as readable, writable, or with an error.
.IP "FI_WAIT_MUT_COND"
Specifies that the counter should use a pthread mutex and cond variable as a
wait object.
.RE
.IP "wait_set"
If wait_obj is FI_WAIT_SET, this field references a wait object to which the
event counter should attach. When an event is added to the event counter,
the corresponding wait set will be signaled if all necessary conditions are
met. The use of a wait_set enables an optimized method of waiting for events
across multiple event counters. This field is ignored if wait_obj is not
FI_WAIT_SET.
.IP "flags"
Flags are reserved for future use, and must be set to 0.
.SS "fi_close"
routines.
However, a provider may signal an FD wait object by marking it as
readable, writable, or with an error.
.IP \[bu] 2
\f[I]FI_WAIT_MUT_COND\f[] : Specifies that the counter should use a
pthread mutex and cond variable as a wait object.
.PP
\f[I]wait_set\f[] : If wait_obj is FI_WAIT_SET, this field references a
wait object to which the event counter should attach.
When an event is added to the event counter, the corresponding wait set
will be signaled if all necessary conditions are met.
The use of a wait_set enables an optimized method of waiting for events
across multiple event counters.
This field is ignored if wait_obj is not FI_WAIT_SET.
.PP
\f[I]flags\f[] : Flags are reserved for future use, and must be set to
0.
.SS fi_close
.PP
The fi_close call releases all resources associated with a counter.
The counter must not be bound to any other resources prior to being freed.
.SS "fi_cntr_control"
The fi_cntr_control call is used to access provider or implementation specific
details of the counter. Access to the counter should be serialized
across all calls when fi_cntr_control is invoked, as it may redirect the
implementation of counter operations. The following control commands are usable
with a counter:
.IP "FI_GETOPSFLAG (uint64_t *)"
Returns the current default operational flags associated with the counter.
.IP "FI_SETOPSFLAG (uint64_t *)"
Modifies the current default operational flags associated with the counter.
.IP "FI_GETWAIT (void **)"
This command allows the user to retrieve the low-level wait object
associated with the counter. The format of the wait-object is specified during
counter creation, through the counter attributes. The fi_cntr_control arg
parameter should be an address where a pointer to the returned wait object
will be written.
.RS
.IP "FI_CNTR_WAIT_MUT_COND"
The counter wait is implemented using a pthread_mutex_t and pthread_cond_t.
FI_GETWAIT will return two pointers, a reference to pthread_mutex_t * and
pthread_cond_t *, respectively.
.RE
.SS "fi_cntr_read"
The counter must not be bound to any other resources prior to being
freed.
.SS fi_cntr_control
.PP
The fi_cntr_control call is used to access provider or implementation
specific details of the counter.
Access to the counter should be serialized across all calls when
fi_cntr_control is invoked, as it may redirect the implementation of
counter operations.
The following control commands are usable with a counter:
.PP
\f[I]FI_GETOPSFLAG (uint64_t *)\f[] : Returns the current default
operational flags associated with the counter.
.PP
\f[I]FI_SETOPSFLAG (uint64_t *)\f[] : Modifies the current default
operational flags associated with the counter.
.PP
\f[I]FI_GETWAIT (void **)\f[] : This command allows the user to retrieve
the low-level wait object associated with the counter.
The format of the wait-object is specified during counter creation,
through the counter attributes.
The fi_cntr_control arg parameter should be an address where a pointer
to the returned wait object will be written.
.PP
\f[I]FI_CNTR_WAIT_MUT_COND\f[] : The counter wait is implemented using a
pthread_mutex_t and pthread_cond_t.
FI_GETWAIT will return two pointers, a reference to pthread_mutex_t *
and pthread_cond_t *, respectively.
.SS fi_cntr_read
.PP
The fi_cntr_read call returns the current value of the counter.
.SS "fi_cntr_readerr"
The read error call returns the number of operations that completed in error
and were unable to update the counter.
.SS "fi_cntr_add"
.SS fi_cntr_readerr
.PP
The read error call returns the number of operations that completed in
error and were unable to update the counter.
.SS fi_cntr_add
.PP
This adds the user-specified value to the counter.
.SS "fi_cntr_set"
.SS fi_cntr_set
.PP
This sets the counter to the specified value.
.SS "fi_cntr_wait"
.SS fi_cntr_wait
.PP
This call may be used to wait until the counter reaches the specified
threshold, or until an error or timeout occurs.
Upon successful return from this call,
the counter will be greater than or equal to the input threshold value.
.sp
Upon successful return from this call, the counter will be greater than
or equal to the input threshold value.
.PP
If an operation associated with the counter encounters an error, it will
increment the error value associated with the counter. Any change in a
counter's error value will unblock any thread inside fi_cntr_wait.
.sp
increment the error value associated with the counter.
Any change in a counter\[aq]s error value will unblock any thread inside
fi_cntr_wait.
.PP
If the call returns due to timeout, -FI_ETIMEDOUT will be returned.
The error value associated with the counter remains unchanged.
.SH "RETURN VALUES"
Returns 0 on success. On error, a negative value corresponding to
fabric errno is returned.
.SH RETURN VALUES
.PP
fi_cntr_read / fi_cntr_readerr
.RS
Returns the current value of the counter.
.RE
Returns 0 on success.
On error, a negative value corresponding to fabric errno is returned.
.PP
Fabric errno values are defined in
.IR "rdma/fi_errno.h".
.SH "NOTES"
.SH "SEE ALSO"
fi_getinfo(3), fi_endpoint(3), fi_domain(3), fi_eq(3), fi_poll(3)
fi_cntr_read / fi_cntr_readerr : Returns the current value of the
counter.
.PP
Fabric errno values are defined in \f[C]rdma/fi_errno.h\f[].
.SH NOTES
.SH SEE ALSO
.PP
\f[C]fi_getinfo\f[](3), \f[C]fi_endpoint\f[](3), \f[C]fi_domain\f[](3),
\f[C]fi_eq\f[](3), \f[C]fi_poll\f[](3)
.SH AUTHORS
OpenFabrics.

Просмотреть файл

@ -1 +0,0 @@
.so man3/fi_atomic.3

Просмотреть файл

@ -1,388 +1,421 @@
.TH "FI_CQ" 3 "2014-11-11" "Libfabric v0.0.2" "Libfabric Programmer's Manual" libfabric
.TH fi_cq 3 "2014\-11\-21" "Libfabric Programmer\[aq]s Manual" "\@VERSION\@"
.SH NAME
fi_cq \- Completion queue operations
.PP
fi_cq_open / fi_close
.RS
Open/close a completion queue
.RE
fi_cq - Completion queue operations
.PP
fi_cq_read / fi_cq_readfrom / fi_cq_readerr
.RS
Read a completion from a completion queue
.RE
fi_cq_open / fi_close : Open/close a completion queue
.PP
fi_cq_write / fi_cq_writeerr
.RS
Writes a completion or error event to a completion queue
.RE
fi_cq_read / fi_cq_readfrom / fi_cq_readerr : Read a completion from a
completion queue
.PP
fi_cq_sread / fi_cq_sreadfrom
.RS
A synchronous (blocking) read that waits until a specified condition has
been met before reading a completion from a completion queue.
.RE
fi_cq_write / fi_cq_writeerr : Writes a completion or error event to a
completion queue
.PP
fi_cq_strerror
.RS
Converts provider specific error information into a printable string
.RE
fi_cq_sread / fi_cq_sreadfrom : A synchronous (blocking) read that waits
until a specified condition has been met before reading a completion
from a completion queue.
.PP
fi_cq_strerror : Converts provider specific error information into a
printable string
.SH SYNOPSIS
.B #include <rdma/fi_domain.h>
.HP
.BI "int fi_cq_open(struct fid_domain *" domain ", struct fi_cq_attr *" attr ", "
.BI "struct fid_cq **" cq ", void * " context ");"
.HP
.BI "int fi_close(struct fid *" cq ");"
.HP
.BI "int fi_control(struct fid *" cq ", int " command ", void *" arg ");"
.PP
.HP
.BI "int fi_cq_read(struct fid_cq *" cq ","
.BI "void *" buf ", size_t " count ");"
.HP
.BI "int fi_cq_readfrom(struct fid_cq *" cq ","
.BI "void *" buf ", size_t " count ", "
.BI "fi_addr_t *" src_addr ");"
.HP
.BI "int fi_cq_readerr(struct fid_cq *" cq ","
.BI "struct fi_cq_err_entry *" buf ", size_t " len ", "
.BI "uint64_t " flags ");"
.PP
.HP
.BI "int fi_cq_write(struct fid_cq *" cq ","
.BI "const void *" buf ", size_t " len ");"
.HP
.BI "int fi_cq_writeerr(struct fid_cq *" cq ","
.BI "struct fi_cq_err_entry *" buf ", size_t " len ", "
.BI "uint64_t " flags ");"
.PP
.HP
.BI "int fi_cq_sread(struct fid_cq *" cq ","
.BI "void *" buf ", size_t " count ", "
.BI "const void *" cond ", int " timeout ");"
.HP
.BI "int fi_cq_sreadfrom(struct fid_cq *" cq ","
.BI "void *" buf ", size_t " count ","
.BI "fi_addr_t *" src_addr ", const void *" cond ", int " timeout ");"
.PP
.HP
.BI "const char * fi_cq_strerror(struct fid_cq *" cq ", int " prov_errno ", "
.BI "const void *" err_data ", void *" buf ", size_t" len ");"
.IP
.nf
\f[C]
#include\ <rdma/fi_domain.h>
int\ fi_cq_open(struct\ fid_domain\ *domain,\ struct\ fi_cq_attr\ *attr,
\ \ \ \ struct\ fid_cq\ **cq,\ void\ *context);
int\ fi_close(struct\ fid\ *cq);
int\ fi_control(struct\ fid\ *cq,\ int\ command,\ void\ *arg);
int\ fi_cq_read(struct\ fid_cq\ *cq,\ void\ *buf,\ size_t\ count);
int\ fi_cq_readfrom(struct\ fid_cq\ *cq,\ void\ *buf,\ size_t\ count,
\ \ \ \ fi_addr_t\ *src_addr);
int\ fi_cq_readerr(struct\ fid_cq\ *cq,\ struct\ fi_cq_err_entry\ *buf,
\ \ \ \ size_t\ len,\ uint64_t\ flags);
int\ fi_cq_write(struct\ fid_cq\ *cq,\ const\ void\ *buf,\ size_t\ len);
int\ fi_cq_writeerr(struct\ fid_cq\ *cq,\ struct\ fi_cq_err_entry\ *buf,
\ \ \ \ size_t\ len,\ uint64_t\ flags);
int\ fi_cq_sread(struct\ fid_cq\ *cq,\ void\ *buf,\ size_t\ count,
\ \ \ \ const\ void\ *cond,\ int\ timeout);
int\ fi_cq_sreadfrom(struct\ fid_cq\ *cq,\ void\ *buf,\ size_t\ count,
\ \ \ \ fi_addr_t\ *src_addr,\ const\ void\ *cond,\ int\ timeout);
const\ char\ *\ fi_cq_strerror(struct\ fid_cq\ *cq,\ int\ prov_errno,
\ \ \ \ \ \ const\ void\ *err_data,\ char\ *buf,\ size_t\ len);
\f[]
.fi
.SH ARGUMENTS
.IP "domain"
Open resource domain
.IP "cq"
Completion queue
.IP "attr"
Completion queue attributes
.IP "context"
User specified context associated with the completion queue.
.IP "buf"
For read calls, the data buffer to write completions into.
.br
.PP
\f[I]domain\f[] : Open resource domain
.PP
\f[I]cq\f[] : Completion queue
.PP
\f[I]attr\f[] : Completion queue attributes
.PP
\f[I]context\f[] : User specified context associated with the completion
queue.
.PP
\f[I]buf\f[] : For read calls, the data buffer to write completions
into.
For write calls, a completion to insert into the completion queue.
.br
For fi_cq_strerror, an optional buffer that receives printable error information.
.IP "count"
Number of CQ entries.
.IP "len"
Length of data buffer
.IP "src_addr"
Source address of a completed receive operation
.IP "flags"
Additional flags to apply to the operation
.IP "cond"
Condition that must be met before a completion is generated
.IP "timeout"
Time in milliseconds to wait. A negative value indicates infinite timeout.
.IP "prov_errno"
Provider specific error value
.IP "err_data"
Provider specific error data related to a completion
.SH "DESCRIPTION"
Completion queues are used to report events associated with data transfers.
For fi_cq_strerror, an optional buffer that receives printable error
information.
.PP
\f[I]count\f[] : Number of CQ entries.
.PP
\f[I]len\f[] : Length of data buffer
.PP
\f[I]src_addr\f[] : Source address of a completed receive operation
.PP
\f[I]flags\f[] : Additional flags to apply to the operation
.PP
\f[I]cond\f[] : Condition that must be met before a completion is
generated
.PP
\f[I]timeout\f[] : Time in milliseconds to wait.
A negative value indicates infinite timeout.
.PP
\f[I]prov_errno\f[] : Provider specific error value
.PP
\f[I]err_data\f[] : Provider specific error data related to a completion
.SH DESCRIPTION
.PP
Completion queues are used to report events associated with data
transfers.
They are associated with message sends and receives, RMA, atomic, tagged
messages, and triggered events. Reported events are
usually associated with a fabric endpoint, but may also refer to memory
regions used as the target of an RMA or atomic operation.
.SS "fi_cq_open"
fi_cq_open allocates a new completion queue. Unlike event queues, completion
queues are associated with a resource domain and may be offloaded entirely
in provider hardware.
.PP
messages, and triggered events.
Reported events are usually associated with a fabric endpoint, but may
also refer to memory regions used as the target of an RMA or atomic
operation.
.SS fi_cq_open
.PP
fi_cq_open allocates a new completion queue.
Unlike event queues, completion queues are associated with a resource
domain and may be offloaded entirely in provider hardware.
.PP
The properties and behavior of a completion queue are defined by
struct fi_cq_attr.
\f[C]struct\ fi_cq_attr\f[].
.IP
.nf
struct fi_cq_attr {
size_t size; /* # entries for CQ */
uint64_t flags; /* operation flags */
enum fi_cq_format format; /* completion format */
enum fi_wait_obj wait_obj; /* requested wait object */
int signaling_vector; /* interrupt affinity */
enum fi_cq_wait_cond wait_cond; /* wait condition format */
struct fid_wait *wait_set; /* optional wait set */
\f[C]
struct\ fi_cq_attr\ {
\ \ \ \ size_t\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ size;\ \ \ \ \ \ /*\ #\ entries\ for\ CQ\ */
\ \ \ \ uint64_t\ \ \ \ \ \ \ \ \ \ \ \ \ flags;\ \ \ \ \ /*\ operation\ flags\ */
\ \ \ \ enum\ fi_cq_format\ \ \ \ format;\ \ \ \ /*\ completion\ format\ */
\ \ \ \ enum\ fi_wait_obj\ \ \ \ \ wait_obj;\ \ /*\ requested\ wait\ object\ */
\ \ \ \ int\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ signaling_vector;\ /*\ interrupt\ affinity\ */
\ \ \ \ enum\ fi_cq_wait_cond\ wait_cond;\ /*\ wait\ condition\ format\ */
\ \ \ \ struct\ fid_wait\ \ \ \ \ *wait_set;\ \ /*\ optional\ wait\ set\ */
};
\f[]
.fi
.IP "size"
Specifies the minimum size of an event queue. A value of 0 indicates that
the provider may choose a default value.
.IP "flags"
Flags that control the configuration of the CQ.
.RS
.IP "FI_WRITE"
Indicates that the application requires support for inserting user events
into the CQ. If this flag is set, then the fi_cq_write and fi_cq_writeerr
operations must be
supported by the provider. If the FI_WRITE flag is not set, then the
application may not invoke fi_cq_write of fi_cq_writeerr.
.IP "FI_REMOTE_SIGNAL"
If specified, this indicates that the CQ should only signal its wait object
upon receiving a remote operation with FI_REMOTE_SIGNAL set, provided that all
other wait conditions have been met. The use of FI_REMOTE_SIGNAL may
improve system utilization by deferring processing of an CQ until a remote
endpoint has completed all necessary operations. FI_REMOTE_SIGNAL should be
treated as an optimization. Providers are not required to wait until a
remote operation with FI_REMOTE_SIGNAL is received before signaling a wait
object associated with an CQ.
.RE
.IP "format"
Completion queues allow the application to select the amount of detail that it
must store and report. The format attribute allows the application to
select one of several completion formats, indicating the structure of the data
that the completion queue should return when read. Supported formats and the
structures that correspond to each are listed below.
.RS
.IP "FI_CQ_FORMAT_UNSPEC"
If an unspecified format is requested, then the CQ will use a provider
selected default format.
.IP "FI_CQ_FORMAT_CONTEXT"
Provides only user specified context that was associated with the completion.
.PP
\f[I]size\f[] : Specifies the minimum size of an event queue.
A value of 0 indicates that the provider may choose a default value.
.PP
\f[I]flags\f[] : Flags that control the configuration of the CQ.
.IP \[bu] 2
\f[I]FI_WRITE\f[] : Indicates that the application requires support for
inserting user events into the CQ.
If this flag is set, then the fi_cq_write and fi_cq_writeerr operations
must be supported by the provider.
If the FI_WRITE flag is not set, then the application may not invoke
fi_cq_write of fi_cq_writeerr.
.IP \[bu] 2
\f[I]FI_REMOTE_SIGNAL\f[] : If specified, this indicates that the CQ
should only signal its wait object upon receiving a remote operation
with FI_REMOTE_SIGNAL set, provided that all other wait conditions have
been met.
The use of FI_REMOTE_SIGNAL may improve system utilization by deferring
processing of an CQ until a remote endpoint has completed all necessary
operations.
FI_REMOTE_SIGNAL should be treated as an optimization.
Providers are not required to wait until a remote operation with
FI_REMOTE_SIGNAL is received before signaling a wait object associated
with an CQ.
.PP
\f[I]format\f[] : Completion queues allow the application to select the
amount of detail that it must store and report.
The format attribute allows the application to select one of several
completion formats, indicating the structure of the data that the
completion queue should return when read.
Supported formats and the structures that correspond to each are listed
below.
.IP \[bu] 2
\f[I]FI_CQ_FORMAT_UNSPEC\f[] : If an unspecified format is requested,
then the CQ will use a provider selected default format.
.IP \[bu] 2
\f[I]FI_CQ_FORMAT_CONTEXT\f[] : Provides only user specified context
that was associated with the completion.
.IP
.nf
struct fi_cq_entry {
void *op_context; /* operation context */
\f[C]
struct\ fi_cq_entry\ {
\ \ \ \ void\ \ \ \ \ *op_context;\ /*\ operation\ context\ */
};
\f[]
.fi
.IP "FI_CQ_FORMAT_MSG"
Provides minimal data for processing completions, with expanded support
for reporting information about received messages.
.IP \[bu] 2
\f[I]FI_CQ_FORMAT_MSG\f[] : Provides minimal data for processing
completions, with expanded support for reporting information about
received messages.
.IP
.nf
struct fi_cq_msg_entry {
void *op_context; /* operation context */
uint64_t flags; /* completion flags */
size_t len; /* size of received data */
\f[C]
struct\ fi_cq_msg_entry\ {
\ \ \ \ void\ \ \ \ \ *op_context;\ /*\ operation\ context\ */
\ \ \ \ uint64_t\ flags;\ \ \ \ \ \ \ /*\ completion\ flags\ */
\ \ \ \ size_t\ \ \ len;\ \ \ \ \ \ \ \ \ /*\ size\ of\ received\ data\ */
};
\f[]
.fi
.IP "FI_CQ_FORMAT_DATA"
Provides data associated with a completion. Includes support for received
message length, remote EQ data, and multi-receive buffers.
.IP \[bu] 2
\f[I]FI_CQ_FORMAT_DATA\f[] : Provides data associated with a completion.
Includes support for received message length, remote EQ data, and
multi-receive buffers.
.IP
.nf
struct fi_cq_data_entry {
void *op_context; /* operation context */
uint64_t flags; /* completion flags */
size_t len; /* size of received data */
void *buf; /* receive data buffer */
uint64_t data; /* completion data */
\f[C]
struct\ fi_cq_data_entry\ {
\ \ \ \ void\ \ \ \ \ *op_context;\ /*\ operation\ context\ */
\ \ \ \ uint64_t\ flags;\ \ \ \ \ \ \ /*\ completion\ flags\ */
\ \ \ \ size_t\ \ \ len;\ \ \ \ \ \ \ \ \ /*\ size\ of\ received\ data\ */
\ \ \ \ void\ \ \ \ \ *buf;\ \ \ \ \ \ \ \ /*\ receive\ data\ buffer\ */
\ \ \ \ uint64_t\ data;\ \ \ \ \ \ \ \ /*\ completion\ data\ */
};
\f[]
.fi
.IP "FI_CQ_FORMAT_TAGGED"
Expands completion data to include support for the tagged message interfaces.
.IP \[bu] 2
\f[I]FI_CQ_FORMAT_TAGGED\f[] : Expands completion data to include
support for the tagged message interfaces.
.IP
.nf
struct fi_cq_tagged_entry {
void *op_context; /* operation context */
uint64_t flags; /* completion flags */
size_t len; /* size of received data */
void *buf; /* receive data buffer */
uint64_t data; /* completion data */
uint64_t tag; /* received tag */
\f[C]
struct\ fi_cq_tagged_entry\ {
\ \ \ \ void\ \ \ \ \ *op_context;\ /*\ operation\ context\ */
\ \ \ \ uint64_t\ flags;\ \ \ \ \ \ \ /*\ completion\ flags\ */
\ \ \ \ size_t\ \ \ len;\ \ \ \ \ \ \ \ \ /*\ size\ of\ received\ data\ */
\ \ \ \ void\ \ \ \ \ *buf;\ \ \ \ \ \ \ \ /*\ receive\ data\ buffer\ */
\ \ \ \ uint64_t\ data;\ \ \ \ \ \ \ \ /*\ completion\ data\ */
\ \ \ \ uint64_t\ tag;\ \ \ \ \ \ \ \ \ /*\ received\ tag\ */
};
\f[]
.fi
.IP "wait_obj"
CQ's may be associated with a specific wait object. Wait objects allow
applications to block until the wait object is signaled, indicating that
a completion is available to be read. Users may use fi_control to retrieve
the underlying wait object associated with an CQ, in order to use it in
other system calls. The following values may be used to specify the type
of wait object associated with an CQ: FI_WAIT_NONE, FI_WAIT_UNSPEC,
FI_WAIT_SET, FI_WAIT_FD, and FI_WAIT_MUT_COND.
.RS
.IP "FI_WAIT_NONE"
Used to indicate that the user will not block (wait) for completions on the CQ.
.PP
\f[I]wait_obj\f[] : CQ\[aq]s may be associated with a specific wait
object.
Wait objects allow applications to block until the wait object is
signaled, indicating that a completion is available to be read.
Users may use fi_control to retrieve the underlying wait object
associated with an CQ, in order to use it in other system calls.
The following values may be used to specify the type of wait object
associated with an CQ: FI_WAIT_NONE, FI_WAIT_UNSPEC, FI_WAIT_SET,
FI_WAIT_FD, and FI_WAIT_MUT_COND.
.IP \[bu] 2
\f[I]FI_WAIT_NONE\f[] : Used to indicate that the user will not block
(wait) for completions on the CQ.
When FI_WAIT_NONE is specified, the application may not call fi_cq_sread
or fi_cq_sreadfrom.
.IP "FI_WAIT_UNSPEC"
Specifies that the user will only wait on the CQ using fabric interface
calls, such as fi_cq_readcond or fi_cq_sreadfrom. In this case, the
underlying provider may select the most appropriate or highest
performing wait object available, including custom wait mechanisms.
Applications that select
FI_WAIT_UNSPEC are not guaranteed to retrieve the underlying wait
.IP \[bu] 2
\f[I]FI_WAIT_UNSPEC\f[] : Specifies that the user will only wait on the
CQ using fabric interface calls, such as fi_cq_readcond or
fi_cq_sreadfrom.
In this case, the underlying provider may select the most appropriate or
highest performing wait object available, including custom wait
mechanisms.
Applications that select FI_WAIT_UNSPEC are not guaranteed to retrieve
the underlying wait object.
.IP \[bu] 2
\f[I]FI_WAIT_SET\f[] : Indicates that the completion queue should use a
wait set object to wait for completions.
If specified, the wait_set field must reference an existing wait set
object.
.IP "FI_WAIT_SET"
Indicates that the completion queue should use a wait set object to wait
for completions. If specified, the wait_set field must reference an existing
wait set object.
.IP "FI_WAIT_FD"
Indicates that the CQ should use a file descriptor as its wait mechanism.
.IP \[bu] 2
\f[I]FI_WAIT_FD\f[] : Indicates that the CQ should use a file descriptor
as its wait mechanism.
A file descriptor wait object must be usable in select, poll, and epoll
routines. However, a provider may signal an FD wait object by marking it
as readable, writable, or with an error.
.IP "FI_WAIT_MUT_COND"
Specifies that the CQ should use a pthread mutex and cond variable as a
wait object.
.RE
.IP "signaling_vector"
Indicates which processor core interrupts associated with the EQ should
target.
.IP "wait_cond"
By default, when a completion is inserted into an CQ that supports blocking
reads (fi_cq_sread/fi_cq_sreadfrom), the corresponding wait
object is signaled. Users may specify a condition that must
first be met before the wait is satisfied. This field indicates how the
provider should interpret the cond field, which describes the condition
needed to signal the wait object.
.sp
A wait condition should be treated as an optimization. Providers are
not required to meet the requirements of the condition before signaling
the wait object. Applications should not rely on the condition
necessarily being true when a blocking read call returns.
.sp
routines.
However, a provider may signal an FD wait object by marking it as
readable, writable, or with an error.
.IP \[bu] 2
\f[I]FI_WAIT_MUT_COND\f[] : Specifies that the CQ should use a pthread
mutex and cond variable as a wait object.
.PP
\f[I]signaling_vector\f[] : Indicates which processor core interrupts
associated with the EQ should target.
.PP
\f[I]wait_cond\f[] : By default, when a completion is inserted into an
CQ that supports blocking reads (fi_cq_sread/fi_cq_sreadfrom), the
corresponding wait object is signaled.
Users may specify a condition that must first be met before the wait is
satisfied.
This field indicates how the provider should interpret the cond field,
which describes the condition needed to signal the wait object.
.PP
A wait condition should be treated as an optimization.
Providers are not required to meet the requirements of the condition
before signaling the wait object.
Applications should not rely on the condition necessarily being true
when a blocking read call returns.
.PP
If wait_cond is set to FI_CQ_COND_NONE, then no additional conditions
are applied to the signaling of the CQ wait object, and the insertion of
any new entry will trigger the wait condition. If wait_cond is
set to FI_CQ_COND_THRESHOLD, then the cond field is interpreted as a size_t
threshold value. The threshold indicates the number of entries that are
to be queued before at the CQ before the wait is satisfied.
.sp
any new entry will trigger the wait condition.
If wait_cond is set to FI_CQ_COND_THRESHOLD, then the cond field is
interpreted as a size_t threshold value.
The threshold indicates the number of entries that are to be queued
before at the CQ before the wait is satisfied.
.PP
This field is ignored if wait_obj is set to FI_WAIT_NONE.
.IP "wait_set"
If wait_obj is FI_WAIT_SET, this field references a wait object to which the
completion queue should attach. When an event is inserted into the completion queue,
the corresponding wait set will be signaled if all necessary conditions are
met. The use of a wait_set enables an optimized method of waiting for events
across multiple event and completion queues. This field is ignored if wait_obj
is not FI_WAIT_SET.
.SS "fi_close"
.PP
\f[I]wait_set\f[] : If wait_obj is FI_WAIT_SET, this field references a
wait object to which the completion queue should attach.
When an event is inserted into the completion queue, the corresponding
wait set will be signaled if all necessary conditions are met.
The use of a wait_set enables an optimized method of waiting for events
across multiple event and completion queues.
This field is ignored if wait_obj is not FI_WAIT_SET.
.SS fi_close
.PP
The fi_close call releases all resources associated with a completion
queue. The CQ must not be bound to any other resources prior to
being closed. Any completions which remain on the CQ when it is closed are
lost.
.SS "fi_control"
The fi_control call is used to access provider or implementation specific
details of the completion queue. Access to the CQ should be serialized
across all calls when fi_control is invoked, as it may redirect the
implementation of CQ operations. The following control commands are usable
with an CQ.
.IP "FI_GETWAIT (void **)"
This command allows the user to retrieve the low-level wait object
associated with the CQ. The format of the wait-object is specified during
CQ creation, through the CQ attributes. The fi_control arg parameter
should be an address where a pointer to the returned wait object
will be written.
.SS "fi_cq_read / fi_cq_readfrom"
The fi_cq_read and fi_cq_readfrom operations perform a non-blocking read of
completion data from the CQ. The format of the completion event
is determined using the fi_cq_format option that was specified when
the CQ was opened. Multiple completions may be retrieved
from a CQ in a single call. The maximum number of
entries to return is limited to the
specified count parameter, with the number of entries successfully read from
the CQ returned by the call.
queue.
The CQ must not be bound to any other resources prior to being closed.
Any completions which remain on the CQ when it is closed are lost.
.SS fi_control
.PP
The fi_cq_readfrom call allows the CQ to return source address information to
the user for any received data. Source address data is only available for
those endpoints configured with FI_SOURCE capability. If fi_cq_readfrom is
called on an endpoint for which source addressing data is not available, the
source address will be set to FI_ADDR_NOTAVAIL. The number of input src_addr
entries must the the same as the count parameter.
The fi_control call is used to access provider or implementation
specific details of the completion queue.
Access to the CQ should be serialized across all calls when fi_control
is invoked, as it may redirect the implementation of CQ operations.
The following control commands are usable with an CQ.
.PP
*FI_GETWAIT (void *\f[I])\f[] : This command allows the user to retrieve
the low-level wait object associated with the CQ.
The format of the wait-object is specified during CQ creation, through
the CQ attributes.
The fi_control arg parameter should be an address where a pointer to the
returned wait object will be written.
.SS fi_cq_read / fi_cq_readfrom
.PP
The fi_cq_read and fi_cq_readfrom operations perform a non-blocking read
of completion data from the CQ.
The format of the completion event is determined using the fi_cq_format
option that was specified when the CQ was opened.
Multiple completions may be retrieved from a CQ in a single call.
The maximum number of entries to return is limited to the specified
count parameter, with the number of entries successfully read from the
CQ returned by the call.
.PP
The fi_cq_readfrom call allows the CQ to return source address
information to the user for any received data.
Source address data is only available for those endpoints configured
with FI_SOURCE capability.
If fi_cq_readfrom is called on an endpoint for which source addressing
data is not available, the source address will be set to
FI_ADDR_NOTAVAIL.
The number of input src_addr entries must the the same as the count
parameter.
.PP
CQs are optimized to report operations which have completed
successfully.
Operations which fail are reported \[aq]out of band\[aq].
Such operations are retrieved using the fi_cq_readerr function.
When an operation that completes with an unexpected error is inserted
into an CQ, it is placed into a temporary error queue.
Attempting to read from an CQ while an item is in the error queue
results in an FI_EAVAIL failure.
Applications may use this return code to determine when to call
fi_cq_readerr.
.SS fi_cq_sread / fi_cq_sreadfrom
.PP
CQs are optimized to report operations which have completed successfully.
Operations which fail are reported 'out of band'. Such operations are
retrieved using the fi_cq_readerr function. When an operation
that completes with an unexpected error is inserted
into an CQ, it is placed into a temporary error queue. Attempting to read
from an CQ while an item is in the error queue results in an FI_EAVAIL
failure. Applications may use this return code to determine when to
call fi_cq_readerr.
.SS "fi_cq_sread / fi_cq_sreadfrom"
The fi_cq_sread and fi_cq_sreadfrom calls are the blocking equivalent
operations to fi_cq_read and fi_cq_readfrom. Their behavior is similar to
the non-blocking calls, with the exception that the calls will not return
until either a completion has been read from the CQ or an error or timeout occurs.
.SS "fi_cq_readerr"
operations to fi_cq_read and fi_cq_readfrom.
Their behavior is similar to the non-blocking calls, with the exception
that the calls will not return until either a completion has been read
from the CQ or an error or timeout occurs.
.SS fi_cq_readerr
.PP
The read error function, fi_cq_readerr, retrieves information regarding
any asynchronous operation which has completed with an unexpected error.
fi_cq_readerr is a non-blocking call, returning immediately whether an
error completion was found or not.
.PP
Error information is reported to the user through struct fi_cq_err_entry.
Error information is reported to the user through
\f[C]struct\ fi_cq_err_entry\f[].
The format of this structure is defined below.
.IP
.nf
struct fi_cq_err_entry {
void *op_context; /* operation context */
uint64_t flags; /* completion flags */
size_t len; /* size of received data */
void *buf; /* receive data buffer */
uint64_t data; /* completion data */
uint64_t tag; /* message tag */
size_t olen; /* overflow length */
int err; /* positive error code */
int prov_errno; /* provider error code */
void *err_data; /* error data */
\f[C]
struct\ fi_cq_err_entry\ {
\ \ \ \ void\ \ \ \ \ *op_context;\ /*\ operation\ context\ */
\ \ \ \ uint64_t\ flags;\ \ \ \ \ \ \ /*\ completion\ flags\ */
\ \ \ \ size_t\ \ \ len;\ \ \ \ \ \ \ \ \ /*\ size\ of\ received\ data\ */
\ \ \ \ void\ \ \ \ \ *buf;\ \ \ \ \ \ \ \ /*\ receive\ data\ buffer\ */
\ \ \ \ uint64_t\ data;\ \ \ \ \ \ \ \ /*\ completion\ data\ */
\ \ \ \ uint64_t\ tag;\ \ \ \ \ \ \ \ \ /*\ message\ tag\ */
\ \ \ \ size_t\ \ \ olen;\ \ \ \ \ \ \ \ /*\ overflow\ length\ */
\ \ \ \ int\ \ \ \ \ \ err;\ \ \ \ \ \ \ \ \ /*\ positive\ error\ code\ */
\ \ \ \ int\ \ \ \ \ \ prov_errno;\ \ /*\ provider\ error\ code\ */
\ \ \ \ void\ \ \ \ *err_data;\ \ \ \ /*\ \ error\ data\ */
};
\f[]
.fi
The general reason for the error is provided through the err field. Provider
specific error information may also be available through the prov_errno
and err_data fields. Users may call fi_cq_strerror to convert provider
specific error information into a printable string for debugging purposes.
.SS "fi_cq_write / fi_cq_writeerr"
The fi_cq_write and fi_cq_writeerr operations insert user-generated completion
entries into a completion queue. fi_cq_write inserts non-error events into
the CQ. The format of the fi_cq_write event must be the same as the
fi_cq_format attribute defined for the CQ when it was created. fi_cq_writeerr
inserts error events into the CQ. The error event format is struct
fi_cq_err_entry. The number of entries to insert into the CQ is determined
by the len parameter. Len must be a multiple of the size of the event to
insert.
.PP
User events inserted into a CQ with be associated with the source address
FI_ADDR_NOTAVAIL.
.SH "RETURN VALUES"
fi_cq_open
.RS
Returns 0 on success. On error, a negative value corresponding to
fabric errno is returned.
.RE
The general reason for the error is provided through the err field.
Provider specific error information may also be available through the
prov_errno and err_data fields.
The err_data field, if set, will reference an internal buffer owned by
the provider.
The contents of the buffer will remain valid until a subsequent read
call against the CQ.
Users may call fi_cq_strerror to convert provider specific error
information into a printable string for debugging purposes.
.SS fi_cq_write / fi_cq_writeerr
.PP
fi_cq_read / fi_cq_readfrom / fi_cq_readerr
.br
fi_cq_sread / fi_cq_sreadfrom
.RS
On success, returns the number of completion events retrieved from the
completion queue. On error, a negative value corresponding to fabric
errno is returned. On timeout, -FI_ETIMEDOUT is returned.
.RE
fi_cq_write / fi_cq_writeerr
.RS
On success, returns the number of bytes read from or written to the completion
queue. On error, a negative value corresponding to fabric errno
is returned.
.RE
The fi_cq_write and fi_cq_writeerr operations insert user-generated
completion entries into a completion queue.
fi_cq_write inserts non-error events into the CQ.
The format of the fi_cq_write event must be the same as the fi_cq_format
attribute defined for the CQ when it was created.
fi_cq_writeerr inserts error events into the CQ.
The error event format is struct fi_cq_err_entry.
The number of entries to insert into the CQ is determined by the len
parameter.
Len must be a multiple of the size of the event to insert.
.PP
fi_cq_strerror
.RS
Returns a character string interpretation of the provider specific error
returned with a completion.
.RE
User events inserted into a CQ with be associated with the source
address FI_ADDR_NOTAVAIL.
.SH RETURN VALUES
.PP
Fabric errno values are defined in
.IR "rdma/fi_errno.h".
.SH "SEE ALSO"
fi_getinfo(3), fi_endpoint(3), fi_domain(3), fi_eq(3), fi_cntr(3), fi_poll(3)
fi_cq_open : Returns 0 on success.
On error, a negative value corresponding to fabric errno is returned.
.PP
fi_cq_read / fi_cq_readfrom / fi_cq_readerr fi_cq_sread /
fi_cq_sreadfrom : On success, returns the number of completion events
retrieved from the completion queue.
On error, a negative value corresponding to fabric errno is returned.
On timeout, -FI_ETIMEDOUT is returned.
.PP
fi_cq_write / fi_cq_writeerr : On success, returns the number of bytes
read from or written to the completion queue.
On error, a negative value corresponding to fabric errno is returned.
.PP
fi_cq_strerror : Returns a character string interpretation of the
provider specific error returned with a completion.
.PP
Fabric errno values are defined in \f[C]rdma/fi_errno.h\f[].
.SH SEE ALSO
.PP
\f[C]fi_getinfo\f[](3), \f[C]fi_endpoint\f[](3), \f[C]fi_domain\f[](3),
\f[C]fi_eq\f[](3), \f[C]fi_cntr\f[](3), \f[C]fi_poll\f[](3)
.SH AUTHORS
OpenFabrics.

Просмотреть файл

@ -1,60 +1,79 @@
.TH "FI_DIRECT" 7 "2014-11-11" "Libfabric v0.0.2" "Libfabric Programmer's Manual" libfabric
.TH fi_direct 7 "2014\-11\-21" "Libfabric Programmer\[aq]s Manual" "\@VERSION\@"
.SH NAME
.PP
Direct fabric provider access
.SH SYNOPSIS
.B -DFABRIC_DIRECT
.IP
.nf
\f[C]
-DFABRIC_DIRECT
#define\ FABRIC_DIRECT
\f[]
.fi
.PP
.B #define FABRIC_DIRECT
.PP
Fabric direct provides a mechanism for applications to compile against
a specific fabric providers without going through the libfabric
framework or function vector tables. This allows for extreme optimization
via function inlining at the cost of supporting multiple providers or
different versions of the same provider.
Fabric direct provides a mechanism for applications to compile against a
specific fabric providers without going through the libfabric framework
or function vector tables.
This allows for extreme optimization via function inlining at the cost
of supporting multiple providers or different versions of the same
provider.
.SH DESCRIPTION
.PP
The use of fabric direct is intended only for applications that require
the absolute minimum software latency, and are willing to re-compile
for specific fabric hardware. Providers that support fabric direct
implement their own versions of the static inline calls which are define
in the libfabric header files, define selected enum values, and provide
defines for compile-time optimizations. Applications can then code against
the standard libfabric calls, but link directly against the provider calls by
defining FABRIC_DIRECT as part of their build.
the absolute minimum software latency, and are willing to re-compile for
specific fabric hardware.
Providers that support fabric direct implement their own versions of the
static inline calls which are define in the libfabric header files,
define selected enum values, and provide defines for compile-time
optimizations.
Applications can then code against the standard libfabric calls, but
link directly against the provider calls by defining FABRIC_DIRECT as
part of their build.
.PP
In general, the use of fabric direct does not require application source
code changes, and, instead, is limited to the build process.
.PP
Providers supporting fabric direct must install 'direct' versions of all
libfabric header files. For convenience, the libfabric sources contain
sample header files that may be modified by a provider. The 'direct'
header file names have 'fi_direct' as their prefix: fi_direct.h,
fi_direct_endpoint.h, etc.
Providers supporting fabric direct must install \[aq]direct\[aq]
versions of all libfabric header files.
For convenience, the libfabric sources contain sample header files that
may be modified by a provider.
The \[aq]direct\[aq] header file names have \[aq]fi_direct\[aq] as their
prefix: fi_direct.h, fi_direct_endpoint.h, etc.
.PP
Direct providers are prohibited from overriding or modifying existing
data structures. However, provider specific extensions are still available.
data structures.
However, provider specific extensions are still available.
In addition to provider direct function calls to provider code, a fabric
direct provider may define zero of more of the following capability
definitions. Applications can check for these capabilities in order to
optimize code paths at compile time, versus relying on run-time checks.
.SH "CAPABILITY DEFINITIONS"
In order that application code may be optimized during compile time, direct
providers must provide definitions for various capabilities and modes, if those
capabilities are supported. The following #define values may be used
by an application to test for provider support of supported features.
.IP "FI_DIRECT_WRITE_NONCOHERENT"
The provider sets FI_WRITE_NONCOHERENT for fi_info:mode. See fi_getinfo
for additional details.
.IP "FI_DIRECT_CONTEXT"
The provider sets FI_CONTEXT for fi_info:mode. See fi_getinfo
for additional details.
.IP "FI_DIRECT_LOCAL_MR"
The provider sets FI_LOCAL_MR for fi_info:mode. See fi_getinfo
for additional details.
.IP "FI_DIRECT_PROV_MR_KEY"
The provider sets FI_PROV_MR_KEY for fi_info:mode. See fi_getinfo
for additional details.
.IP "FI_DIRECT_DYNAMIC_MR"
The provider sets FI_DYNAMIC_MR for fi_info:caps. See fi_getinfo
for additional details.
.SH "SEE ALSO"
fi_getinfo(3), fi_endpoint(3), fi_domain(3)
definitions.
Applications can check for these capabilities in order to optimize code
paths at compile time, versus relying on run-time checks.
.SH CAPABILITY DEFINITIONS
.PP
In order that application code may be optimized during compile time,
direct providers must provide definitions for various capabilities and
modes, if those capabilities are supported.
The following #define values may be used by an application to test for
provider support of supported features.
.PP
\f[I]FI_DIRECT_CONTEXT\f[] : The provider sets FI_CONTEXT for
fi_info:mode.
See fi_getinfo for additional details.
.PP
\f[I]FI_DIRECT_LOCAL_MR\f[] : The provider sets FI_LOCAL_MR for
fi_info:mode.
See fi_getinfo for additional details.
.PP
\f[I]FI_DIRECT_PROV_MR_ATTR\f[] : The provider sets FI_PROV_MR_ATTR for
fi_info:mode.
See fi_getinfo for additional details.
.PP
\f[I]FI_DIRECT_DYNAMIC_MR\f[] : The provider sets FI_DYNAMIC_MR for
fi_info:caps.
See fi_getinfo for additional details.
.SH SEE ALSO
.PP
\f[C]fi_getinfo\f[](3), \f[C]fi_endpoint\f[](3), \f[C]fi_domain\f[](3)
.SH AUTHORS
OpenFabrics.

Просмотреть файл

@ -1,243 +1,296 @@
.TH "FI_DOMAIN" 3 "2014-11-11" "Libfabric v0.0.2" "Libfabric Programmer's Manual" libfabric
.TH fi_domain 3 "2014\-11\-21" "Libfabric Programmer\[aq]s Manual" "\@VERSION\@"
.SH NAME
fi_domain \- Open a fabric access domain
.SH SYNOPSIS
.B "#include <rdma/fabric.h>"
.br
.B "#include <rdma/fi_domain.h>"
.HP
.BI "int fi_domain(struct fid_fabric *" fabric ", struct fi_info *" info ", "
.BI "struct fid_domain **" domain ", void *" context ");"
.HP
.BI "int fi_close(struct fid *" domain ");"
.PP
.HP
.BI "int fi_domain_bind(struct fid_domain *" domain ", struct fid *" eq ", "
.BI "uint64_t " flags ");"
.HP
.BI "int fi_open_ops(struct fid *" domain ", const char *" name ", uint64_t " flags ","
.BI "void **" ops ", void *" context ");"
.SH ARGUMENTS
.IP "fabric" 12
Fabric domain
.IP "info" 12
Fabric information, including domain capabilities and attributes.
.IP "domain" 12
An opened access domain.
.IP "context" 12
User specified context associated with the domain. This context is returned as
part of any asynchronous event associated with the domain.
.IP "eq" 12
Event queue for asynchronous operations initiated on the domain.
.IP "name" 12
Name associated with an interface.
.IP "ops" 12
Fabric interface operations.
.SH "DESCRIPTION"
An access domain typically refers to a
physical or virtual NIC or hardware port; however, a domain may span across
multiple hardware components for fail-over or data striping purposes.
A domain defines the boundary for associating different resources
together. Fabric resources belonging to the same domain may share
resources.
.SS "fi_domain"
Opens a fabric access domain, also referred to as a resource domain.
Fabric domains are identified by a name. The properties of
the opened domain are specified using the info parameter.
.SS "fi_open_ops"
fi_open_ops is used to open provider specific interfaces.
Provider interfaces may be used to access low-level resources and operations
that are specific to the opened resource domain. The details of domain
interfaces are outside the scope of this documentation.
.SS "fi_domain_bind"
Associates an event queue with the domain. An event queue bound to a
domain will be the default EQ associated with
asynchronous control events that occur on the domain or active endpoints
allocated on a domain. This includes CM events. Endpoints may direct their
control events to alternate EQs by binding directly with the EQ.
.sp
Binding an event queue to a
domain with the FI_REG_MR flag indicates that the provider should perform
all memory registration operations asynchronously, with the completion
reported through the event queue. If an event queue is not bound to the
domain with the FI_REG_MR flag, then memory registration requests
complete synchronously.
.SS "fi_close"
The fi_close call is used to release all resources associated with a domain
or interface. All items associated with the opened domain must
be released prior to calling fi_close.
.SH "DOMAIN ATTRIBUTES"
The fi_domain_attr structure defines the set of attributes associated with a
domain.
.sp
fi_domain - Open a fabric access domain
.SH SYNOPSIS
.IP
.nf
struct fi_domain_attr {
struct fid_domain *domain;
char *name;
enum fi_threading threading;
enum fi_progress control_progress;
enum fi_progress data_progress;
size_t mr_key_size;
size_t cq_data_size;
size_t ep_cnt;
size_t tx_ctx_cnt;
size_t rx_ctx_cnt;
size_t max_ep_tx_ctx;
size_t max_ep_rx_ctx;
size_t op_size;
size_t iov_size;
};
\f[C]
#include\ <rdma/fabric.h>
#include\ <rdma/fi_domain.h>
int\ fi_domain(struct\ fid_fabric\ *fabric,\ struct\ fi_info\ *info,
\ \ \ \ struct\ fid_domain\ **domain,\ void\ *context);
int\ fi_close(struct\ fid\ *domain);
int\ fi_domain_bind(struct\ fid_domain\ *domain,\ struct\ fid\ *eq,
\ \ \ \ uint64_t\ flags);
int\ fi_open_ops(struct\ fid\ *domain,\ const\ char\ *name,\ uint64_t\ flags,
\ \ \ \ void\ **ops,\ void\ *context);
\f[]
.fi
.SS "domain"
On input to fi_getinfo, a user may set this to an opened domain instance to
restrict output to the given domain. On output from fi_getinfo, if no domain
was specified, but the user has an opened instance of the named domain, this will
reference the first opened instance. If no instance has been opened, this
field will be NULL.
.SS "Name"
.SH ARGUMENTS
.PP
\f[I]fabric\f[] : Fabric domain
.PP
\f[I]info\f[] : Fabric information, including domain capabilities and
attributes.
.PP
\f[I]domain\f[] : An opened access domain.
.PP
\f[I]context\f[] : User specified context associated with the domain.
This context is returned as part of any asynchronous event associated
with the domain.
.PP
\f[I]eq\f[] : Event queue for asynchronous operations initiated on the
domain.
.PP
\f[I]name\f[] : Name associated with an interface.
.PP
\f[I]ops\f[] : Fabric interface operations.
.SH DESCRIPTION
.PP
An access domain typically refers to a physical or virtual NIC or
hardware port; however, a domain may span across multiple hardware
components for fail-over or data striping purposes.
A domain defines the boundary for associating different resources
together.
Fabric resources belonging to the same domain may share resources.
.SS fi_domain
.PP
Opens a fabric access domain, also referred to as a resource domain.
Fabric domains are identified by a name.
The properties of the opened domain are specified using the info
parameter.
.SS fi_open_ops
.PP
fi_open_ops is used to open provider specific interfaces.
Provider interfaces may be used to access low-level resources and
operations that are specific to the opened resource domain.
The details of domain interfaces are outside the scope of this
documentation.
.SS fi_domain_bind
.PP
Associates an event queue with the domain.
An event queue bound to a domain will be the default EQ associated with
asynchronous control events that occur on the domain or active endpoints
allocated on a domain.
This includes CM events.
Endpoints may direct their control events to alternate EQs by binding
directly with the EQ.
.PP
Binding an event queue to a domain with the FI_REG_MR flag indicates
that the provider should perform all memory registration operations
asynchronously, with the completion reported through the event queue.
If an event queue is not bound to the domain with the FI_REG_MR flag,
then memory registration requests complete synchronously.
.SS fi_close
.PP
The fi_close call is used to release all resources associated with a
domain or interface.
All items associated with the opened domain must be released prior to
calling fi_close.
.SH DOMAIN ATTRIBUTES
.PP
The \f[C]fi_domain_attr\f[] structure defines the set of attributes
associated with a domain.
.IP
.nf
\f[C]
struct\ fi_domain_attr\ {
\ \ \ \ struct\ fid_domain\ *domain;
\ \ \ \ char\ \ \ \ \ \ \ \ \ \ \ \ \ \ *name;
\ \ \ \ enum\ fi_threading\ threading;
\ \ \ \ enum\ fi_progress\ \ control_progress;
\ \ \ \ enum\ fi_progress\ \ data_progress;
\ \ \ \ size_t\ \ \ \ \ \ \ \ \ \ \ \ mr_key_size;
\ \ \ \ size_t\ \ \ \ \ \ \ \ \ \ \ \ cq_data_size;
\ \ \ \ size_t\ \ \ \ \ \ \ \ \ \ \ \ cq_cnt;
\ \ \ \ size_t\ \ \ \ \ \ \ \ \ \ \ \ ep_cnt;
\ \ \ \ size_t\ \ \ \ \ \ \ \ \ \ \ \ tx_ctx_cnt;
\ \ \ \ size_t\ \ \ \ \ \ \ \ \ \ \ \ rx_ctx_cnt;
\ \ \ \ size_t\ \ \ \ \ \ \ \ \ \ \ \ max_ep_tx_ctx;
\ \ \ \ size_t\ \ \ \ \ \ \ \ \ \ \ \ max_ep_rx_ctx;
};
\f[]
.fi
.SS domain
.PP
On input to fi_getinfo, a user may set this to an opened domain instance
to restrict output to the given domain.
On output from fi_getinfo, if no domain was specified, but the user has
an opened instance of the named domain, this will reference the first
opened instance.
If no instance has been opened, this field will be NULL.
.SS Name
.PP
The name of the access domain.
.SS "Multi-threading Support (threading)"
.SS Multi-threading Support (threading)
.PP
The threading model specifies the level of serialization required of an
application when using the libfabric data transfer interfaces. Control
interfaces are always considered thread safe, and may be accessed by
multiple threads. Applications which can guarantee serialization in their
access of provider allocated resources and interfaces enables a provider
to eliminate lower-level locks.
.IP "FI_THREAD_UNSPEC"
This value indicates that no threading model has been defined. It may be
used on input hints to the fi_getinfo call.
.IP "FI_THREAD_SAFE"
A thread safe serialization model allows a multi-threaded application to
access any allocated resources through any interface without restriction.
application when using the libfabric data transfer interfaces.
Control interfaces are always considered thread safe, and may be
accessed by multiple threads.
Applications which can guarantee serialization in their access of
provider allocated resources and interfaces enables a provider to
eliminate lower-level locks.
.PP
\f[I]FI_THREAD_UNSPEC\f[] : This value indicates that no threading model
has been defined.
It may be used on input hints to the fi_getinfo call.
.PP
\f[I]FI_THREAD_SAFE\f[] : A thread safe serialization model allows a
multi-threaded application to access any allocated resources through any
interface without restriction.
All providers are required to support FI_THREAD_SAFE.
.IP "FI_THREAD_PROGRESS"
A progress serialization model requires applications to serialize access
to provider resources and interfaces based on the progress model. For
providers with automatic progress, access to each endpoint must be serialized,
and access to each event queue, counter, wait or poll set must be serialized.
Serialization is required only by threads accessing the same object. For
example, one thread may be initiating a data transfer on an endpoint, while
another thread reads from an event queue associated with the endpoint.
Serialization to endpoint access is further limited to different endpoint
data flows, if available. Multiple threads may initiate transfers on the
same endpoint if they reference different data flows.
.sp
For providers with manual progress, applications must serialize their access
to any object that is part of a single progress domain. A progress domain
is any set of associated endpoints, event queues, counters, wait sets, and
poll sets. For instance, endpoints that share the same event queue or poll
set belong to the same progress domain. Applications that can allocate
endpoint resources to specific threads can reduce provider locking by
using FI_THREAD_PROGRESS.
.SS "Progress Models (control_progress / data_progress)"
.PP
\f[I]FI_THREAD_PROGRESS\f[] : A progress serialization model requires
applications to serialize access to provider resources and interfaces
based on the progress model.
For providers with automatic progress, access to each endpoint must be
serialized, and access to each event queue, counter, wait or poll set
must be serialized.
Serialization is required only by threads accessing the same object.
For example, one thread may be initiating a data transfer on an
endpoint, while another thread reads from an event queue associated with
the endpoint.
Serialization to endpoint access is further limited to different
endpoint data flows, if available.
Multiple threads may initiate transfers on the same endpoint if they
reference different data flows.
.PP
For providers with manual progress, applications must serialize their
access to any object that is part of a single progress domain.
A progress domain is any set of associated endpoints, event queues,
counters, wait sets, and poll sets.
For instance, endpoints that share the same event queue or poll set
belong to the same progress domain.
Applications that can allocate endpoint resources to specific threads
can reduce provider locking by using FI_THREAD_PROGRESS.
.SS Progress Models (control_progress / data_progress)
.PP
Progress is the ability of the underlying implementation to complete
processing of an asynchronous request. In many cases, the processing of
an asynchronous request requires the use of the host processor. For example,
a received message may need to be matched with the correct buffer, or a timed
out request may need to be retransmitted. For performance reasons, it may be
undesirable for the provider to allocate a thread for this purpose, which
will compete with the application threads.
.sp
Control progress indicates the method that the provider uses to make progress
on asynchronous control operations. Control operations are function
which do not directly involve the transfer of application data between
endpoints. They include address vector, memory registration, and
connection management routines.
.sp
Data progress indicates the method that the provider uses to make progress on
data transfer operations. This includes message queue, RMA, tagged messaging,
and atomic operations, along with their completion processing.
.sp
processing of an asynchronous request.
In many cases, the processing of an asynchronous request requires the
use of the host processor.
For example, a received message may need to be matched with the correct
buffer, or a timed out request may need to be retransmitted.
For performance reasons, it may be undesirable for the provider to
allocate a thread for this purpose, which will compete with the
application threads.
.PP
Control progress indicates the method that the provider uses to make
progress on asynchronous control operations.
Control operations are function which do not directly involve the
transfer of application data between endpoints.
They include address vector, memory registration, and connection
management routines.
.PP
Data progress indicates the method that the provider uses to make
progress on data transfer operations.
This includes message queue, RMA, tagged messaging, and atomic
operations, along with their completion processing.
.PP
To balance between performance and ease of use, two progress models are
defined.
.IP "FI_PROGRESS_UNSPEC"
This value indicates that no progress model has been defined. It may be
used on input hints to the fi_getinfo call.
.IP "FI_PROGRESS_AUTO"
This progress model indicates that the provider will make forward progress
on an asynchronous operation without further intervention by the application.
When FI_PROGRESS_AUTO is provided as output to fi_getinfo in the absence of
any progress hints, it often indicates that the desired functionality is
implemented by the provider hardware or is a standard service of
the operating system.
.sp
All providers are required to support FI_PROGRESS_AUTO. However, if a
provider does not natively support automatic progress, forcing the use of
FI_PROGRESS_AUTO may result in threads being allocated below the fabric
interfaces.
.IP "FI_PROGRESS_MANUAL"
This progress model indicates that the provider requires the use of an
application thread to complete an asynchronous request. When manual
progress is set, the provider will attempt to advance an asynchronous
operation forward when the application invokes any event queue read or
wait operation where the completion will be reported. Progress also
occurs when the application processes a poll or wait set.
.sp
.PP
\f[I]FI_PROGRESS_UNSPEC\f[] : This value indicates that no progress
model has been defined.
It may be used on input hints to the fi_getinfo call.
.PP
\f[I]FI_PROGRESS_AUTO\f[] : This progress model indicates that the
provider will make forward progress on an asynchronous operation without
further intervention by the application.
When FI_PROGRESS_AUTO is provided as output to fi_getinfo in the absence
of any progress hints, it often indicates that the desired functionality
is implemented by the provider hardware or is a standard service of the
operating system.
.PP
All providers are required to support FI_PROGRESS_AUTO.
However, if a provider does not natively support automatic progress,
forcing the use of FI_PROGRESS_AUTO may result in threads being
allocated below the fabric interfaces.
.PP
\f[I]FI_PROGRESS_MANUAL\f[] : This progress model indicates that the
provider requires the use of an application thread to complete an
asynchronous request.
When manual progress is set, the provider will attempt to advance an
asynchronous operation forward when the application invokes any event
queue read or wait operation where the completion will be reported.
Progress also occurs when the application processes a poll or wait set.
.PP
Only wait operations defined by the fabric interface will result in an
operation progressing. Operating system or external wait functions,
such as select, poll, or pthread routines, cannot.
.SS "MR Key Size"
Size of the memory region remote access key, in bytes. Applications that
request their own MR key must select a value within the range specified by
this value.
.SS "CQ Data Size"
The number of bytes that the provider supports for remote CQ data. See
the FI_REMOTE_CQ_DATA flag (fi_getinfo) for the use of remote CQ data.
.SS "Endpoint Count (ep_cnt)"
operation progressing.
Operating system or external wait functions, such as select, poll, or
pthread routines, cannot.
.SS MR Key Size
.PP
Size of the memory region remote access key, in bytes.
Applications that request their own MR key must select a value within
the range specified by this value.
.SS CQ Data Size
.PP
The number of bytes that the provider supports for remote CQ data.
See the FI_REMOTE_CQ_DATA flag (fi_getinfo) for the use of remote CQ
data.
.SS Completion Queue Count (cq_cnt)
.PP
The total number of completion queues supported by the domain, relative
to any specified or default CQ attributes.
The cq_cnt value may be a fixed value of the maximum number of CQs
supported by the underlying provider, or may be a dynamic value, based
on the default attributes of an allocated CQ, such as the CQ size and
data format.
.SS Endpoint Count (ep_cnt)
.PP
The total number of endpoints supported by the domain, relative to any
specified or default endpoint attributes. The ep_cnt value may be a
fixed value of the maximum number of endpoints supported by the underlying
provider, or may be a dynamic value, based on the default attributes of
an allocated endpoint, such as the endpoint capabilities and size. The
endpoint count is the number of addressable endpoints supported by the
specified or default endpoint attributes.
The ep_cnt value may be a fixed value of the maximum number of endpoints
supported by the underlying provider, or may be a dynamic value, based
on the default attributes of an allocated endpoint, such as the endpoint
capabilities and size.
The endpoint count is the number of addressable endpoints supported by
the provider.
.SS Transmit Context Count (tx_ctx_cnt)
.PP
The number of outbound command queues optimally supported by the
provider.
For a low-level provider, this represents the number command queues to
the hardware and/or the number of parallel transmit engines effectively
supported by the hardware and caches.
Applications which allocate more transmit contexts than this value will
end up sharing underlying resources.
By default, there is a single transmit context associated with each
endpoint, but in an advanced usage model, an endpoint may be configured
with multiple transmit contexts.
.SS Receive Context Count (rx_ctx_cnt)
.PP
The number of inbound processing queues optimally supported by the
provider.
.SS "Transmit Context Count (tx_ctx_cnt)"
The number of outbound command queues optimally supported by the provider.
For a low-level provider, this represents the number command queues to the
hardware and/or the number of parallel transmit engines effectively
supported by the hardware and caches. Applications which allocate more
transmit contexts than this value will end up sharing underlying resources.
By default, there is a single transmit context associated with each endpoint,
but in an advanced usage model, an endpoint may be configured with multiple
transmit contexts.
.SS "Receive Context Count (rx_ctx_cnt)"
The number of inbound processing queues optimally supported by the provider.
For a low-level provider, this represents the number hardware queues
that can be effectively utilized for processing incoming packets.
Applications which allocate more receive contexts than this value
will end up sharing underlying resources.
Applications which allocate more receive contexts than this value will
end up sharing underlying resources.
By default, a single receive context is associated with each endpoint,
but in an advanced usage model, an endpoint may be configured with multiple
receive contexts.
.SS "Maximum Endpoint Transmit Context (max_ep_tx_ctx)"
but in an advanced usage model, an endpoint may be configured with
multiple receive contexts.
.SS Maximum Endpoint Transmit Context (max_ep_tx_ctx)
.PP
The maximum number of transmit contexts that may be associated with an
endpoint.
.SS "Maximum Endpoint Receive Context (max_ep_rx_ctx)"
.SS Maximum Endpoint Receive Context (max_ep_rx_ctx)
.PP
The maximum number of receive contexts that may be associated with an
endpoint.
.SS "Operation Size (op_size)"
This is an output value. It corresponds to the amount of transmit
or receive context memory that is consumed for each posted request.
It may be used by applications that manage their context usage
to determine if a local context can queue an additional operation.
See fi_endpoint(3) for further discussion.
.SS "IO Vector Size (iov_size)"
This is an output value. It corresponds to the amount of transmit
or receive context memory that is consumed for each IO vector (e.g. SGE)
that is referenced by a posted request. It may be used by applications
that manage their context usage to determine if a local context can
queue an additional operation.
See fi_endpoint(3) for further discussion.
.SH "RETURN VALUE"
Returns 0 on success. On error, a negative value corresponding to fabric
errno is returned. Fabric errno values are defined in
.IR "rdma/fi_errno.h".
.SH "NOTES"
Users should call fi_close to release all resources allocated to the fabric
domain.
.SH RETURN VALUE
.PP
Returns 0 on success.
On error, a negative value corresponding to fabric errno is returned.
Fabric errno values are defined in \f[C]rdma/fi_errno.h\f[].
.SH NOTES
.PP
Users should call fi_close to release all resources allocated to the
fabric domain.
.PP
The following fabric resources are associated with access domains:
active endpoints, memory regions, completion event queues, and address
vectors.
.SH "SEE ALSO"
fi_getinfo(3), fi_endpoint(3), fi_eq(3), fi_av(3), fi_mr(3)
.SH SEE ALSO
.PP
\f[C]fi_getinfo\f[](3), \f[C]fi_endpoint\f[](3), \f[C]fi_av\f[](3),
\f[C]fi_eq\f[](3), \f[C]fi_mr\f[](3)
.SH AUTHORS
OpenFabrics.

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -1 +0,0 @@
.so man3/fi_endpoint.3

Просмотреть файл

@ -1,325 +1,357 @@
.TH "FI_EQ" 3 "2014-11-11" "Libfabric v0.0.2" "Libfabric Programmer's Manual" libfabric
.TH fi_eq 3 "2014\-12\-03" "Libfabric Programmer\[aq]s Manual" "\@VERSION\@"
.SH NAME
fi_eq \- Event queue operations
.PP
fi_eq_open / fi_close
.RS
Open/close an event queue
.RE
fi_eq - Event queue operations
.PP
fi_eq_read / fi_eq_readerr
.RS
Read an event from an event queue
.RE
fi_eq_open / fi_close : Open/close an event queue
.PP
fi_eq_write
.RS
Writes an event to an event queue
.RE
fi_eq_read / fi_eq_readerr : Read an event from an event queue
.PP
fi_eq_sread
.RS
A synchronous (blocking) read of an event queue
.RE
fi_eq_write : Writes an event to an event queue
.PP
fi_eq_strerror
.RS
Converts provider specific error information into a printable string
.RE
fi_eq_sread : A synchronous (blocking) read of an event queue
.PP
fi_eq_strerror : Converts provider specific error information into a
printable string
.SH SYNOPSIS
.B #include <rdma/fi_domain.h>
.HP
.BI "int fi_eq_open(struct fid_fabric *" fabric ", struct fi_eq_attr *" attr ", "
.BI "struct fid_eq **" eq ", void * " context ");"
.HP
.BI "int fi_close(struct fid *" eq ");"
.HP
.BI "int fi_control(struct fid *" eq ", int " command ", void *" arg ");"
.PP
.HP
.BI "int fi_eq_read(struct fid_eq *" eq ", uint32_t *" event ", "
.BI "void *" buf ", size_t " len ", uint64_t " flags ");"
.HP
.BI "int fi_eq_readerr(struct fid_eq *" eq ", "
.BI "struct fi_eq_err_entry *" buf ", size_t " len ", "
.BI "uint64_t " flags ");"
.PP
.HP
.BI "int fi_eq_write(struct fid_eq *" eq ", uint32_t " event ", "
.BI "const void *" buf ", size_t " len ", uint64_t " flags ");"
.PP
.HP
.BI "int fi_eq_sread(struct fid_eq *" eq ", uint32_t *" event ", "
.BI "void *" buf ", size_t " len ", "
.BI "int " timeout ", uint64_t " flags ");"
.PP
.HP
.BI "const char * fi_eq_strerror(struct fid_eq *" eq ", int " prov_errno ", "
.BI "const void *" err_data ", void *" buf ", size_t" len ");"
.SH ARGUMENTS
.IP "fabric"
Opened fabric descriptor
.IP "domain"
Open resource domain
.IP "eq"
Event queue
.IP "attr"
Event queue attributes
.IP "context"
User specified context associated with the event queue.
.IP "event"
Reported event
.IP "buf"
For read calls, the data buffer to write events into.
.br
For write calls, an event to insert into the event queue.
.br
For fi_eq_strerror, an optional buffer that receives printable error information.
.IP "len"
Length of data buffer
.IP "flags"
Additional flags to apply to the operation
.IP "prov_errno"
Provider specific error value
.IP "err_data"
Provider specific error data related to a completion
.IP "timeout"
Timeout specified in miliseconds
.SH "DESCRIPTION"
Event queues are used to report events associated with control operations.
They are associated with memory registration, address vectors, connection
management, and fabric and domain level events. Reported events are
either associated with a requested operation or affiliated with a
call that registers for specific
types of events, such as listening for connection requests.
.SS "fi_eq_open"
fi_eq_open allocates a new event queue.
.PP
The properties and behavior of an event queue are defined by struct fi_eq_attr.
.PP
.IP
.nf
\f[C]
#include\ <rdma/fi_domain.h>
struct fi_eq_attr {
size_t size; /* # entries for EQ */
uint64_t flags; /* operation flags */
enum fi_wait_obj wait_obj; /* requested wait object */
int signaling_vector; /* interrupt affinity */
struct fid_wait *wait_set; /* optional wait set */
};
int\ fi_eq_open(struct\ fid_fabric\ *fabric,\ struct\ fi_eq_attr\ *attr,
\ \ \ \ struct\ fid_eq\ **eq,\ void\ *context);
int\ fi_close(struct\ fid\ *eq);
int\ fi_control(struct\ fid\ *eq,\ int\ command,\ void\ *arg);
int\ fi_eq_read(struct\ fid_eq\ *eq,\ uint32_t\ *event,
\ \ \ \ void\ *buf,\ size_t\ len,\ uint64_t\ flags);
int\ fi_eq_readerr(struct\ fid_eq\ *eq,\ struct\ fi_eq_err_entry\ *buf,
\ \ \ \ size_t\ len,\ uint64_t\ flags);
int\ fi_eq_write(struct\ fid_eq\ *eq,\ uint32_t\ event,
\ \ \ \ const\ void\ *buf,\ size_t\ len,\ uint64_t\ flags);
int\ fi_eq_sread(struct\ fid_eq\ *eq,\ uint32_t\ *event,
\ \ \ \ void\ *buf,\ size_t\ len,\ int\ timeout,\ uint64_t\ flags);
const\ char\ *\ fi_eq_strerror(struct\ fid_eq\ *eq,\ int\ prov_errno,
\ \ \ \ \ \ const\ void\ *err_data,\ void\ *buf,\ size_t\ len);
\f[]
.fi
.IP "size"
Specifies the minimum size of an event queue.
.IP "flags"
Flags that control the configuration of the EQ.
.RS
.IP "FI_WRITE"
Indicates that the application requires support for inserting user events
into the EQ. If this flag is set, then the fi_eq_write operation must be
supported by the provider. If the FI_WRITE flag is not set, then the
application may not invoke fi_eq_write.
.RE
.IP "wait_obj"
EQ's may be associated with a specific wait object. Wait objects allow
applications to block until the wait object is signaled, indicating that
an event is available to be read. Users may use fi_control to retrieve
the underlying wait object associated with an EQ, in order to use it in
other system calls. The following values may be used to specify the type
of wait object associated with an EQ:
.RS
.IP "FI_WAIT_NONE"
Used to indicate that the user will not block (wait) for events on the EQ.
When FI_WAIT_NONE is specified, the application may not call fi_eq_sread.
.IP "FI_WAIT_UNSPEC"
Specifies that the user will only wait on the EQ using fabric interface
calls, such as fi_eq_sread. In this case, the underlying provider may
select the most appropriate or highest performing wait object available,
including custom wait mechanisms. Applications that select
FI_WAIT_UNSPEC are not guaranteed to retrieve the underlying wait
.SH ARGUMENTS
.PP
\f[I]fabric\f[] : Opened fabric descriptor
.PP
\f[I]domain\f[] : Open resource domain
.PP
\f[I]eq\f[] : Event queue
.PP
\f[I]attr\f[] : Event queue attributes
.PP
\f[I]context\f[] : User specified context associated with the event
queue.
.PP
\f[I]event\f[] : Reported event
.PP
\f[I]buf\f[] : For read calls, the data buffer to write events into.
For write calls, an event to insert into the event queue.
For fi_eq_strerror, an optional buffer that receives printable error
information.
.PP
\f[I]len\f[] : Length of data buffer
.PP
\f[I]flags\f[] : Additional flags to apply to the operation
.PP
\f[I]prov_errno\f[] : Provider specific error value
.PP
\f[I]err_data\f[] : Provider specific error data related to a completion
.PP
\f[I]timeout\f[] : Timeout specified in milliseconds
.SH DESCRIPTION
.PP
Event queues are used to report events associated with control
operations.
They are associated with memory registration, address vectors,
connection management, and fabric and domain level events.
Reported events are either associated with a requested operation or
affiliated with a call that registers for specific types of events, such
as listening for connection requests.
.SS fi_eq_open
.PP
fi_eq_open allocates a new event queue.
.PP
The properties and behavior of an event queue are defined by
\f[C]struct\ fi_eq_attr\f[].
.IP
.nf
\f[C]
struct\ fi_eq_attr\ {
\ \ \ \ size_t\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ size;\ \ \ \ \ \ /*\ #\ entries\ for\ EQ\ */
\ \ \ \ uint64_t\ \ \ \ \ \ \ \ \ \ \ \ \ flags;\ \ \ \ \ /*\ operation\ flags\ */
\ \ \ \ enum\ fi_wait_obj\ \ \ \ \ wait_obj;\ \ /*\ requested\ wait\ object\ */
\ \ \ \ int\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ signaling_vector;\ /*\ interrupt\ affinity\ */
\ \ \ \ struct\ fid_wait\ \ \ \ \ *wait_set;\ \ /*\ optional\ wait\ set\ */
};
\f[]
.fi
.PP
\f[I]size\f[] : Specifies the minimum size of an event queue.
.PP
\f[I]flags\f[] : Flags that control the configuration of the EQ.
.IP \[bu] 2
\f[I]FI_WRITE\f[] : Indicates that the application requires support for
inserting user events into the EQ.
If this flag is set, then the fi_eq_write operation must be supported by
the provider.
If the FI_WRITE flag is not set, then the application may not invoke
fi_eq_write.
.PP
\f[I]wait_obj\f[] : EQ\[aq]s may be associated with a specific wait
object.
.IP "FI_WAIT_SET"
Indicates that the event queue should use a wait set object to wait
for events. If specified, the wait_set field must reference an existing
wait set object.
.IP "FI_WAIT_FD"
Indicates that the EQ should use a file descriptor as its wait mechanism.
Wait objects allow applications to block until the wait object is
signaled, indicating that an event is available to be read.
Users may use fi_control to retrieve the underlying wait object
associated with an EQ, in order to use it in other system calls.
The following values may be used to specify the type of wait object
associated with an EQ:
.IP \[bu] 2
\f[I]FI_WAIT_NONE\f[] : Used to indicate that the user will not block
(wait) for events on the EQ.
When FI_WAIT_NONE is specified, the application may not call
fi_eq_sread.
.IP \[bu] 2
\f[I]FI_WAIT_UNSPEC\f[] : Specifies that the user will only wait on the
EQ using fabric interface calls, such as fi_eq_sread.
In this case, the underlying provider may select the most appropriate or
highest performing wait object available, including custom wait
mechanisms.
Applications that select FI_WAIT_UNSPEC are not guaranteed to retrieve
the underlying wait object.
.IP \[bu] 2
\f[I]FI_WAIT_SET\f[] : Indicates that the event queue should use a wait
set object to wait for events.
If specified, the wait_set field must reference an existing wait set
object.
.IP \[bu] 2
\f[I]FI_WAIT_FD\f[] : Indicates that the EQ should use a file descriptor
as its wait mechanism.
A file descriptor wait object must be usable in select, poll, and epoll
routines. However, a provider may signal an FD wait object by marking it
as readable, writable, or with an error.
.IP "FI_WAIT_MUT_COND"
Specifies that the EQ should use a pthread mutex and cond variable as a
wait object.
.RE
.IP "signaling_vector"
Indicates which processor core interrupts associated with the EQ should
target.
.IP "wait_set"
If wait_obj is FI_WAIT_SET, this field references a wait object to which the
event queue should attach. When an event is inserted into the event queue,
the corresponding wait set will be signaled if all necessary conditions are
met. The use of a wait_set enables an optimized method of waiting for events
across multiple event queues. This field is ignored if wait_obj is not
FI_WAIT_SET.
.SS "fi_close"
The fi_close call releases all resources associated with an event
queue. The EQ must not be bound to any other resources prior to
being closed. Any events which remain on the EQ when it is closed are
lost.
.SS "fi_control"
The fi_control call is used to access provider or implementation specific
details of the event queue. Access to the EQ should be serialized
across all calls when fi_control is invoked, as it may redirect the
implementation of EQ operations. The following control commands are usable
with an EQ.
.IP "FI_GETWAIT (void **)"
This command allows the user to retrieve the low-level wait object
associated with the EQ. The format of the wait-object is specified during
EQ creation, through the EQ attributes. The fi_control arg parameter
should be an address where a pointer to the returned wait object
will be written.
.SS "fi_eq_read"
The fi_eq_read operations performs a non-blocking read of
event data from the EQ. The format of the event data is based on the type
of event retrieved from the EQ, with all events starting with a
struct fi_eq_entry header. At most one event will be returned per EQ read
operation. The number of bytes successfully read from the EQ is returned
from the read. The FI_PEEK flag may be used to indicate that event
data should be read from the EQ without being consumed. A subsequent
read without the FI_PEEK flag would then remove the event from the EQ.
routines.
However, a provider may signal an FD wait object by marking it as
readable, writable, or with an error.
.IP \[bu] 2
\f[I]FI_WAIT_MUT_COND\f[] : Specifies that the EQ should use a pthread
mutex and cond variable as a wait object.
.PP
\f[I]signaling_vector\f[] : Indicates which processor core interrupts
associated with the EQ should target.
.PP
\f[I]wait_set\f[] : If wait_obj is FI_WAIT_SET, this field references a
wait object to which the event queue should attach.
When an event is inserted into the event queue, the corresponding wait
set will be signaled if all necessary conditions are met.
The use of a wait_set enables an optimized method of waiting for events
across multiple event queues.
This field is ignored if wait_obj is not FI_WAIT_SET.
.SS fi_close
.PP
The fi_close call releases all resources associated with an event queue.
The EQ must not be bound to any other resources prior to being closed.
Any events which remain on the EQ when it is closed are lost.
.SS fi_control
.PP
The fi_control call is used to access provider or implementation
specific details of the event queue.
Access to the EQ should be serialized across all calls when fi_control
is invoked, as it may redirect the implementation of EQ operations.
The following control commands are usable with an EQ.
.PP
\f[I]FI_GETWAIT (void **)\f[] : This command allows the user to retrieve
the low-level wait object associated with the EQ.
The format of the wait-object is specified during EQ creation, through
the EQ attributes.
The fi_control arg parameter should be an address where a pointer to the
returned wait object will be written.
.SS fi_eq_read
.PP
The fi_eq_read operations performs a non-blocking read of event data
from the EQ.
The format of the event data is based on the type of event retrieved
from the EQ, with all events starting with a struct fi_eq_entry header.
At most one event will be returned per EQ read operation.
The number of bytes successfully read from the EQ is returned from the
read.
The FI_PEEK flag may be used to indicate that event data should be read
from the EQ without being consumed.
A subsequent read without the FI_PEEK flag would then remove the event
from the EQ.
.PP
The following types of events may be reported to an EQ, along with
information regarding the format associated with each event.
.IP "Asynchronous Control Operations"
Asynchronous control operations are basic requests that simply need to
generate an event to indicate that they have completed. These include
the following types of events: memory registration, address vector resolution,
connection established, and multicast join.
.sp
Control requests report their completion by inserting a struct fi_eq_entry
into the EQ. The format of this structure is:
.PP
\f[I]Asynchronous Control Operations\f[] : Asynchronous control
operations are basic requests that simply need to generate an event to
indicate that they have completed.
These include the following types of events: memory registration,
address vector resolution, connection established, and multicast join.
.PP
Control requests report their completion by inserting a
\f[C]struct\ \ \ fi_eq_entry\f[] into the EQ.
The format of this structure is:
.IP
.nf
struct fi_eq_entry {
fid_t fid; /* fid associated with request */
void *context; /* operation context */
uint32_t data; /* completion dependent data */
\f[C]
struct\ fi_eq_entry\ {
\ \ \ \ fid_t\ \ \ \ \ \ \ \ \ \ \ \ fid;\ \ \ \ \ \ \ \ /*\ fid\ associated\ with\ request\ */
\ \ \ \ void\ \ \ \ \ \ \ \ \ \ \ \ *context;\ \ \ \ /*\ operation\ context\ */
\ \ \ \ uint32_t\ \ \ \ \ \ \ \ \ data;\ \ \ \ \ \ \ /*\ completion\ dependent\ data\ */
};
\f[]
.fi
For the completion of basic asynchronous control operations, the returned event
will be to FI_COMPLETE. The fid will reference the fabric descriptor
associated with the event. For memory registration, this will be the fid_mr,
address resolution will reference a fid_av, and CM events will refer to a
fid_ep. The context field will be set to the context specified as part of
the operation.
.IP "Connection Request Notification"
Connection requests are unsolicited notifications that a remote endpoint
wishes to establish a new connection to a listening passive endpoint.
Connection requests are reported using struct fi_eq_cm_entry:
.PP
For the completion of basic asynchronous control operations, the
returned event will be to FI_COMPLETE.
The fid will reference the fabric descriptor associated with the event.
For memory registration, this will be the fid_mr, address resolution
will reference a fid_av, and CM events will refer to a fid_ep.
The context field will be set to the context specified as part of the
operation.
.PP
\f[I]Connection Request Notification\f[] : Connection requests are
unsolicited notifications that a remote endpoint wishes to establish a
new connection to a listening passive endpoint.
Connection requests are reported using
\f[C]struct\ \ \ fi_eq_cm_entry\f[]:
.IP
.nf
struct fi_eq_cm_entry {
fid_t fid; /* fid associated with request */
struct fi_info *info; /* endpoint information */
uint8_t data[0]; /* app connection data */
\f[C]
struct\ fi_eq_cm_entry\ {
\ \ \ \ fid_t\ \ \ \ \ \ \ \ \ \ \ \ fid;\ \ \ \ \ \ \ \ /*\ fid\ associated\ with\ request\ */
\ \ \ \ struct\ fi_info\ \ *info;\ \ \ \ \ \ \ /*\ endpoint\ information\ */
\ \ \ \ uint8_t\ \ \ \ \ \ \ \ \ data[0];\ \ \ \ \ /*\ app\ connection\ data\ */
};
\f[]
.fi
Connection request events are of type FI_CONNREQ. The fid is the passive
endpoint. Information regarding the requested endpoint's capabilities and
attributes are available from the info field. The application is
responsible for freeing this structure by calling fi_freeinfo when it
is no longer needed. The fi_info connreq field will reference the
connection request associated with this event. For an accepted
connection, the connreq must be associated with an endpoint when
it is opened. Typically, this is done by simply passing the fi_info
returned as part of the CM event into fi_endpoint(). If the connection
is rejected, the connreq must be passed into the fi_reject call.
.sp
Any application data exchanged as part of the connection
request is placed beyond the fi_eq_cm_entry structure. The amount of data
available is application dependent and limited to the buffer space provided
by the application when fi_eq_read is called. The amount of returned data
may be calculated using the return value to fi_eq_read. Note that the amount
of returned data is limited by the underlying connection
protocol, and the length of any data returned may include protocol padding.
.PP
Connection request events are of type FI_CONNREQ.
The fid is the passive endpoint.
Information regarding the requested endpoint\[aq]s capabilities and
attributes are available from the info field.
The application is responsible for freeing this structure by calling
fi_freeinfo when it is no longer needed.
The fi_info connreq field will reference the connection request
associated with this event.
To accept a connection, an endpoint must first be created by passing an
fi_info structure referencing this connreq field to fi_endpoint().
This endpoint is then passed to fi_accept() to complete the acceptance
of the connection attempt.
Creating the endpoint is most easily accomplished by passing the fi_info
returned as part of the CM event into fi_endpoint().
If the connection is to be rejected, the connreq is passed to
fi_reject().
.PP
Any application data exchanged as part of the connection request is
placed beyond the fi_eq_cm_entry structure.
The amount of data available is application dependent and limited to the
buffer space provided by the application when fi_eq_read is called.
The amount of returned data may be calculated using the return value to
fi_eq_read.
Note that the amount of returned data is limited by the underlying
connection protocol, and the length of any data returned may include
protocol padding.
As a result, the returned length may be larger than that specified by
the connecting peer.
.IP "Connection Shutdown Notification"
Notification that a remote peer has disconnected from an active endpoint is
done through the FI_SHUTDOWN event. Shutdown notification uses struct
fi_eq_entry as declared above. The fid field for a shutdown notification
refers to the active endpoint's fid_ep. The context field is set to NULL.
.SS "fi_eq_sread"
The fi_eq_sread call is the blocking (or synchronous) equivalent to fi_eq_read.
It behaves is similar to
the non-blocking call, with the exception that the calls will not return
until either an event has been read from the EQ or an error or timeout occurs.
.PP
\f[I]Connection Shutdown Notification\f[] : Notification that a remote
peer has disconnected from an active endpoint is done through the
FI_SHUTDOWN event.
Shutdown notification uses struct fi_eq_entry as declared above.
The fid field for a shutdown notification refers to the active
endpoint\[aq]s fid_ep.
The context field is set to NULL.
.SS fi_eq_sread
.PP
The fi_eq_sread call is the blocking (or synchronous) equivalent to
fi_eq_read.
It behaves is similar to the non-blocking call, with the exception that
the calls will not return until either an event has been read from the
EQ or an error or timeout occurs.
Specifying a negative timeout means an infinite timeout.
.SS "fi_eq_readerr"
.SS fi_eq_readerr
.PP
The read error function, fi_eq_readerr, retrieves information regarding
any asynchronous operation which has completed with an unexpected error.
fi_eq_readerr is a non-blocking call, returning immediately whether an
error completion was found or not.
.PP
EQs are optimized to report operations which have completed successfully.
Operations which fail are reported 'out of band'. Such operations are
retrieved using the fi_eq_readerr function. When an operation
that completes with an unexpected error is inserted
into an EQ, it is placed into a temporary error queue. Attempting to read
from an EQ while an item is in the error queue results in an FI_EAVAIL
failure. Applications may use this return code to determine when to
call fi_eq_readerr.
EQs are optimized to report operations which have completed
successfully.
Operations which fail are reported \[aq]out of band\[aq].
Such operations are retrieved using the fi_eq_readerr function.
When an operation that completes with an unexpected error is inserted
into an EQ, it is placed into a temporary error queue.
Attempting to read from an EQ while an item is in the error queue
results in an FI_EAVAIL failure.
Applications may use this return code to determine when to call
fi_eq_readerr.
.PP
Error information is reported to the user through struct fi_eq_err_entry.
Error information is reported to the user through struct
fi_eq_err_entry.
The format of this structure is defined below.
.IP
.nf
struct fi_eq_err_entry {
fid_t fid; /* fid associated with error */
void *context; /* operation context */
uint32_t index; /* index for vector ops */
int err; /* positive error code */
int prov_errno; /* provider error code */
void *err_data; /* additional error data */
\f[C]
struct\ fi_eq_err_entry\ {
\ \ \ \ fid_t\ \ \ \ \ \ \ \ \ \ \ \ fid;\ \ \ \ \ \ \ \ /*\ fid\ associated\ with\ error\ */
\ \ \ \ void\ \ \ \ \ \ \ \ \ \ \ \ *context;\ \ \ \ /*\ operation\ context\ */
\ \ \ \ uint32_t\ \ \ \ \ \ \ \ \ index;\ \ \ \ \ \ /*\ index\ for\ vector\ ops\ */
\ \ \ \ int\ \ \ \ \ \ \ \ \ \ \ \ \ \ err;\ \ \ \ \ \ \ \ /*\ positive\ error\ code\ */
\ \ \ \ int\ \ \ \ \ \ \ \ \ \ \ \ \ \ prov_errno;\ /*\ provider\ error\ code\ */
\ \ \ \ void\ \ \ \ \ \ \ \ \ \ \ \ *err_data;\ \ \ /*\ additional\ error\ data\ */
};
\f[]
.fi
The fid will reference the fabric descriptor
associated with the event. For memory registration, this will be the fid_mr,
address resolution will reference a fid_av, and CM events will refer to a
fid_ep. The context field will be set to the context specified as part of
the operation.
.sp
The general reason for the error is provided through the err field. Provider
specific error information may also be available through the prov_errno
and err_data fields. Users may call fi_eq_strerror to convert provider
specific error information into a printable string for debugging purposes.
.SH "RETURN VALUES"
fi_eq_open
.RS
Returns 0 on success. On error, a negative value corresponding to
fabric errno is returned.
.RE
.PP
The fid will reference the fabric descriptor associated with the event.
For memory registration, this will be the fid_mr, address resolution
will reference a fid_av, and CM events will refer to a fid_ep.
The context field will be set to the context specified as part of the
operation.
.PP
The general reason for the error is provided through the err field.
Provider specific error information may also be available through the
prov_errno and err_data fields.
Users may call fi_eq_strerror to convert provider specific error
information into a printable string for debugging purposes.
.SH RETURN VALUES
.PP
fi_eq_open : Returns 0 on success.
On error, a negative value corresponding to fabric errno is returned.
.PP
fi_eq_read / fi_eq_readerr
.br
.PD 0
.P
.PD
fi_eq_sread
.br
fi_eq_write
.RS
On success, returns the number of bytes read from or written to the
event queue. On error, a negative value corresponding to fabric errno
is returned. On timeout, fi_eq_sread returns -FI_ETIMEDOUT.
.RE
.PD 0
.P
.PD
fi_eq_write : On success, returns the number of bytes read from or
written to the event queue.
On error, a negative value corresponding to fabric errno is returned.
On timeout, fi_eq_sread returns -FI_ETIMEDOUT.
.PP
fi_eq_strerror
.RS
Returns a character string interpretation of the provider specific error
returned with a completion.
.RE
fi_eq_strerror : Returns a character string interpretation of the
provider specific error returned with a completion.
.PP
Fabric errno values are defined in
.IR "rdma/fi_errno.h".
.SH "SEE ALSO"
fi_getinfo(3), fi_endpoint(3), fi_domain(3), fi_cntr(3), fi_poll(3)
Fabric errno values are defined in \f[C]rdma/fi_errno.h\f[].
.SH SEE ALSO
.PP
\f[C]fi_getinfo\f[](3), \f[C]fi_endpoint\f[](3), \f[C]fi_domain\f[](3),
\f[C]fi_cntr\f[](3), \f[C]fi_poll\f[](3)
.SH AUTHORS
OpenFabrics.

Просмотреть файл

@ -1,114 +1,139 @@
.TH "FI_FABRIC" 3 "2014-11-11" "Libfabric v0.0.2" "Libfabric Programmer's Manual" libfabric
.TH fi_fabric 3 "2014-11-14" "Libfabric Programmer\[aq]s Manual" "\@VERSION\@"
.SH NAME
fi_fabric \- Fabric domain operations
.PP
fi_fabric / fi_close
.RS
Open / close a fabric domain
.RE
fi_fabric - Fabric domain operations
.PP
fi_tostr
.RS
Convert fabric attributes, flags, and capabilities to printable string
.RE
fi_fabric / fi_close : Open / close a fabric domain
.PP
fi_tostr : Convert fabric attributes, flags, and capabilities to
printable string
.SH SYNOPSIS
.B "#include <rdma/fabric.h>"
.HP
.BI "int fi_fabric(struct fi_fabric_attr *" attr ","
.BI "struct fid_fabric **" fabric ", void *" context ");"
.HP
.BI "int fi_close(struct fid *" fabric ");"
.HP
.BI "char * fi_tostr(const void *" data ", enum fi_type " datatype ");"
.SH ARGUMENTS
.IP "attr" 12
Attributes of fabric to open.
.IP "fabric" 12
Fabric domain
.IP "context" 12
User specified context associated with the opened object. This context is
returned as part of any associated asynchronous event.
.SH "DESCRIPTION"
A fabric domain represents a collection of hardware and software resources
that access a single physical or virtual network. All network ports on a
system that can communicate with each other through their attached
networks belong to the same fabric domain. A fabric domain shares
network addresses and can span multiple providers.
.SS "fi_fabric"
Opens a fabric provider. The attributes of the fabric provider are
specified through the open call, and may be obtained by calling fi_getinfo.
.SS "fi_close"
The fi_close call is used to release all resources associated with a fabric
domain or interface. All items associated with the opened fabric must
be released prior to calling fi_close.
.SS "fi_tostr"
Converts fabric interface attributes, capabilities, flags, and enum values
into a printable string. The data parameter accepts a pointer to the
attribute or value(s) to display, with the datatype parameter indicating
the type of data referenced by the data parameter. Valid values for the
datatype are listed below, along with the corresponding datatype or field
value.
.IP "FI_TYPE_INFO"
struct fi_info
.IP "FI_TYPE_EP_TYPE"
struct fi_info::type field
.IP "FI_TYPE_EP_CAP"
struct fi_info::ep_cap field
.IP "FI_TYPE_OP_FLAGS"
struct fi_info::op_flags field, or general uint64_t flags
.IP "FI_TYPE_ADDR_FORMAT"
struct fi_info::addr_format field
.IP "FI_TYPE_TX_ATTR"
struct fi_tx_ctx_attr
.IP "FI_TYPE_RX_ATTR"
struct fi_rx_ctx_attr
.IP "FI_TYPE_EP_ATTR"
struct fi_ep_attr
.IP "FI_TYPE_DOMAIN_ATTR"
struct fi_domain_attr
.IP "FI_TYPE_FABRIC_ATTR"
struct fi_fabric_attr
.IP "FI_TYPE_DOMAIN_CAP"
struct fi_info::domain_cap field
.IP "FI_TYPE_THREADING"
enum fi_threading
.IP "FI_TYPE_PROGRESS"
enum fi_progress
.IP "FI_TYPE_PROTO"
struct fi_ep_attr::protocol field
.IP "FI_TYPE_MSG_ORDER"
struct fi_ep_attr::msg_order field
.SH "NOTES"
The following resources are associated with fabric domains: access domains,
passive endpoints, and CM event queues.
.SH "FABRIC ATTRIBUTES"
The fi_fabric_attr structure defines the set of attributes associated with a
fabric and a fabric provider.
.sp
.IP
.nf
struct fi_fabric_attr {
struct fid_fabric *fabric;
char *name;
char *prov_name;
uint32_t prov_version;
};
\f[C]
#include\ <rdma/fabric.h>
int\ fi_fabric(struct\ fi_fabric_attr\ *attr,
\ \ \ \ struct\ fid_fabric\ **fabric,\ void\ *context);
int\ fi_close(struct\ fid\ *fabric);
char\ *\ fi_tostr(const\ void\ *data,\ enum\ fi_type\ datatype);
\f[]
.fi
.SS "fabric"
On input to fi_getinfo, a user may set this to an opened fabric instance to
restrict output to the given fabric. On output from fi_getinfo, if no fabric
was specified, but the user has an opened instance of the named fabric, this will
reference the first opened instance. If no instance has been opened, this
field will be NULL.
.SS "Name"
.SH ARGUMENTS
.PP
\f[I]attr\f[] : Attributes of fabric to open.
.PP
\f[I]fabric\f[] : Fabric domain
.PP
\f[I]context\f[] : User specified context associated with the opened
object.
This context is returned as part of any associated asynchronous event.
.SH DESCRIPTION
.PP
A fabric domain represents a collection of hardware and software
resources that access a single physical or virtual network.
All network ports on a system that can communicate with each other
through their attached networks belong to the same fabric domain.
A fabric domain shares network addresses and can span multiple
providers.
.SS fi_fabric
.PP
Opens a fabric provider.
The attributes of the fabric provider are specified through the open
call, and may be obtained by calling fi_getinfo.
.SS fi_close
.PP
The fi_close call is used to release all resources associated with a
fabric domain or interface.
All items associated with the opened fabric must be released prior to
calling fi_close.
.SS fi_tostr
.PP
Converts fabric interface attributes, capabilities, flags, and enum
values into a printable string.
The data parameter accepts a pointer to the attribute or value(s) to
display, with the datatype parameter indicating the type of data
referenced by the data parameter.
Valid values for the datatype are listed below, along with the
corresponding datatype or field value.
.PP
\f[I]FI_TYPE_INFO\f[] : struct fi_info
.PP
\f[I]FI_TYPE_EP_TYPE\f[] : struct fi_info::type field
.PP
\f[I]FI_TYPE_EP_CAP\f[] : struct fi_info::ep_cap field
.PP
\f[I]FI_TYPE_OP_FLAGS\f[] : struct fi_info::op_flags field, or general
uint64_t flags
.PP
\f[I]FI_TYPE_ADDR_FORMAT\f[] : struct fi_info::addr_format field
.PP
\f[I]FI_TYPE_TX_ATTR\f[] : struct fi_tx_ctx_attr
.PP
\f[I]FI_TYPE_RX_ATTR\f[] : struct fi_rx_ctx_attr
.PP
\f[I]FI_TYPE_EP_ATTR\f[] : struct fi_ep_attr
.PP
\f[I]FI_TYPE_DOMAIN_ATTR\f[] : struct fi_domain_attr
.PP
\f[I]FI_TYPE_FABRIC_ATTR\f[] : struct fi_fabric_attr
.PP
\f[I]FI_TYPE_DOMAIN_CAP\f[] : struct fi_info::domain_cap field
.PP
\f[I]FI_TYPE_THREADING\f[] : enum fi_threading
.PP
\f[I]FI_TYPE_PROGRESS\f[] : enum fi_progress
.PP
\f[I]FI_TYPE_PROTO\f[] : struct fi_ep_attr::protocol field
.PP
\f[I]FI_TYPE_MSG_ORDER\f[] : struct fi_ep_attr::msg_order field
.SH NOTES
.PP
The following resources are associated with fabric domains: access
domains, passive endpoints, and CM event queues.
.SH FABRIC ATTRIBUTES
.PP
The fi_fabric_attr structure defines the set of attributes associated
with a fabric and a fabric provider.
.IP
.nf
\f[C]
struct\ fi_fabric_attr\ {
\ \ \ \ struct\ fid_fabric\ *fabric;
\ \ \ \ char\ \ \ \ \ \ \ \ \ \ \ \ \ \ *name;
\ \ \ \ char\ \ \ \ \ \ \ \ \ \ \ \ \ \ *prov_name;
\ \ \ \ uint32_t\ \ \ \ \ \ \ \ \ \ prov_version;
};
\f[]
.fi
.SS fabric
.PP
On input to fi_getinfo, a user may set this to an opened fabric instance
to restrict output to the given fabric.
On output from fi_getinfo, if no fabric was specified, but the user has
an opened instance of the named fabric, this will reference the first
opened instance.
If no instance has been opened, this field will be NULL.
.SS name
.PP
A fabric identifier.
.SS "Provider Name"
.SS prov_name
.PP
The name of the underlying fabric provider.
.SS "Provider Version"
.SS prov_version
.PP
Version information for the fabric provider.
.SH "RETURN VALUE"
Returns 0 on success. On error, a negative value corresponding to fabric
errno is returned. Fabric errno values are defined in
.IR "rdma/fi_errno.h".
.SH "ERRORS"
.SH "SEE ALSO"
fabric(7), fi_getinfo(3), fi_domain(3), fi_eq(3), fi_endpoint(3)
.SH RETURN VALUE
.PP
Returns 0 on success.
On error, a negative value corresponding to fabric errno is returned.
Fabric errno values are defined in \f[C]rdma/fi_errno.h\f[].
.SH ERRORS
.SH SEE ALSO
.PP
\f[C]fi_fabric\f[](7), \f[C]fi_getinfo\f[](3), \f[C]fi_domain\f[](3),
\f[C]fi_eq\f[](3), \f[C]fi_endpoint\f[](3)
.SH AUTHORS
OpenFabrics.

Просмотреть файл

@ -1 +0,0 @@
.so man3/fi_atomic.3

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -1 +0,0 @@
.so man3/fi_msg.3

Просмотреть файл

@ -1,255 +1,291 @@
.TH "FI_MR" 3 "2014-11-11" "Libfabric v0.0.2" "Libfabric Programmer's Manual" libfabric
.TH fi_mr 3 "2014-11-14" "Libfabric Programmer\[aq]s Manual" "\@VERSION\@"
.SH NAME
fi_mr \- Memory region operations
.PP
fi_mr_reg / fi_mr_regv / fi_mr_regattr
.RS
Register local memory buffers for direct fabric access
.RE
fi_mr - Memory region operations
.PP
fi_close
.RS
Deregister registered memory buffers.
.RE
fi_mr_reg / fi_mr_regv / fi_mr_regattr : Register local memory buffers
for direct fabric access
.PP
fi_mr_desc
.RS
Return a local descriptor associated with a registered memory region
.RE
fi_close : Deregister registered memory buffers.
.PP
fi_mr_key
.RS
Return the remote key needed to access a registered memory region
.RE
fi_mr_desc : Return a local descriptor associated with a registered
memory region
.PP
fi_mr_bind
.RS
Associate a registered memory region with an event collector.
.RE
fi_mr_key : Return the remote key needed to access a registered memory
region
.PP
fi_mr_bind : Associate a registered memory region with an event
collector.
.SH SYNOPSIS
.B #include <rdma/fi_domain.h>
.HP
.BI "int fi_mr_reg(struct fid_domain *" domain ", "
.BI "const void * " buf ", size_t " len ", "
.BI "uint64_t " access ", uint64_t " offset ", uint64_t " requested_key ", "
.BI "uint64_t " flags ", struct fid_mr **" mr ", void *" context ");"
.HP
.BI "int fi_mr_regv(struct fid_domain *" domain ", "
.BI "const struct iovec * " iov ", size_t " count ", "
.BI "uint64_t " access ", uint64_t " offset ", uint64_t " requested_key ", "
.BI "uint64_t " flags ", struct fid_mr **" mr ", void *" context ");"
.HP
.BI "int fi_mr_regattr(struct fid_domain *" domain ", "
.BI "const struct fi_mr_attr * " attr ","
.BI "uint64_t " flags ", struct fid_mr **" mr ");"
.PP
.HP
.BI "int fi_close(struct fid *" mr ");"
.PP
.HP
.BI "void * fi_mr_desc(struct fid_mr *" mr ");"
.HP
.BI "uint64_t fi_mr_key(struct fid_mr *" mr ");"
.PP
.HP
.BI "int fi_mr_bind(struct fid_mr *" mr ", struct fid *" ec ", "
.BI "uint64_t " flags ");"
.IP
.nf
\f[C]
#include\ <rdma/fi_domain.h>
int\ fi_mr_reg(struct\ fid_domain\ *domain,\ const\ void\ *buf,\ size_t\ len,
\ \ \ \ uint64_t\ access,\ uint64_t\ offset,\ uint64_t\ requested_key,
\ \ \ \ uint64_t\ flags,\ struct\ fid_mr\ **mr,\ void\ *context);
int\ fi_mr_regv(struct\ fid_domain\ *domain,\ const\ struct\ iovec\ *\ iov,
\ \ \ \ size_t\ count,\ uint64_t\ access,\ uint64_t\ offset,\ uint64_t\ requested_key,
\ \ \ \ uint64_t\ flags,\ struct\ fid_mr\ **mr,\ void\ *context);
int\ fi_mr_regattr(struct\ fid_domain\ *domain,\ const\ struct\ fi_mr_attr\ *attr,
\ \ \ \ uint64_t\ flags,\ struct\ fid_mr\ **mr);
int\ fi_close(struct\ fid\ *mr);
void\ *\ fi_mr_desc(struct\ fid_mr\ *mr);
uint64_t\ fi_mr_key(struct\ fid_mr\ *mr);
int\ fi_mr_bind(struct\ fid_mr\ *mr,\ struct\ fid\ *ec,\ uint64_t\ flags);
\f[]
.fi
.SH ARGUMENTS
.IP "domain"
Resource domain
.IP "mr"
Memory region
.IP "ec"
Event queue or counter
.IP "context"
User specified context associated with the memory region.
.IP "buf"
Memory buffer to register with the fabric hardware
.IP "len"
Length of memory buffer to register
.IP "iov"
Vectored memory buffer.
.IP "count"
Count of vectored buffer entries.
.IP "access"
Memory access permissions associated with registration
.IP "offset"
Optional specified offset for accessing specified registered buffers.
.IP "requested_key"
Optional requested remote key associated with registered buffers.
.IP "attr"
Memory region attributes
.IP "flags"
Additional flags to apply to the operation.
.SH "DESCRIPTION"
Registered memory regions associate memory buffers with permissions
granted for access by fabric resources. A memory buffer must be
registered with a resource domain before it can be used as the target
of a remote RMA or atomic data transfer. Additionally, a fabric
provider may require that data buffers be registered before being
used in local transfers.
.PP
A provider may hide local registration requirements from applications
by making use of an internal registration cache or similar mechanisms.
\f[I]domain\f[] : Resource domain
.PP
\f[I]mr\f[] : Memory region
.PP
\f[I]ec\f[] : Event queue or counter
.PP
\f[I]context\f[] : User specified context associated with the memory
region.
.PP
\f[I]buf\f[] : Memory buffer to register with the fabric hardware
.PP
\f[I]len\f[] : Length of memory buffer to register
.PP
\f[I]iov\f[] : Vectored memory buffer.
.PP
\f[I]count\f[] : Count of vectored buffer entries.
.PP
\f[I]access\f[] : Memory access permissions associated with registration
.PP
\f[I]offset\f[] : Optional specified offset for accessing specified
registered buffers.
This parameter is reserved for future use and must be 0.
.PP
\f[I]requested_key\f[] : Optional requested remote key associated with
registered buffers.
.PP
\f[I]attr\f[] : Memory region attributes
.PP
\f[I]flags\f[] : Additional flags to apply to the operation.
.SH DESCRIPTION
.PP
Registered memory regions associate memory buffers with permissions
granted for access by fabric resources.
A memory buffer must be registered with a resource domain before it can
be used as the target of a remote RMA or atomic data transfer.
Additionally, a fabric provider may require that data buffers be
registered before being used in local transfers.
.PP
A provider may hide local registration requirements from applications by
making use of an internal registration cache or similar mechanisms.
Such mechanisms, however, may negatively impact performance for some
applications, notably those which manage their own network buffers.
In order to support as broad range of applications as possible, without
unduly affecting their performance, applications that wish to manage
their own local memory registrations may do so by using the memory
registration calls. Applications may use the FI_LOCAL_MR domain
capability bit as a guide.
registration calls.
Applications may use the FI_LOCAL_MR domain capability bit as a guide.
.PP
Providers may support applications registering any range of addresses
in their virtual address space, whether or not those addresses are
back by physical pages or have been allocated to the app. Support for
this ability is specified through the FI_DYNAMIC_MR capability flag.
Providers may support applications registering any range of addresses in
their virtual address space, whether or not those addresses are back by
physical pages or have been allocated to the app.
Support for this ability is specified through the FI_DYNAMIC_MR
capability flag.
Providers that lack this capability require that registered memory
regions be backed by allocated memory pages.
.PP
The registrations functions -- fi_mr_reg, fi_mr_regv, and fi_mr_regattr --
are used to register one or more memory buffers with fabric resources.
The main difference between registration functions are the number
and type of parameters that they accept as input. Otherwise,
they perform the same general function.
The attributes of a registered memory region may be specified by either
the provider or, if supported, the application.
Relevant attributes include the MR key associated with the region and
the address (offset) used by peer applications when accessing the region
through RMA or atomic operations.
The FI_PROV_MR_ATTR mode bit indicates if the provider will supply these
attribute values, or if the application may select them.
Provider supplied values will require that an application exchange the
memory region attributes with peers if RMA is required.
.PP
By default, memory registration completes synchronously. I.e. the
registration call will not return until the registration has completed.
The registrations functions -- fi_mr_reg, fi_mr_regv, and fi_mr_regattr
-- are used to register one or more memory buffers with fabric
resources.
The main difference between registration functions are the number and
type of parameters that they accept as input.
Otherwise, they perform the same general function.
.PP
By default, memory registration completes synchronously.
I.e.
the registration call will not return until the registration has
completed.
Memory registration can complete asynchronous by binding the resource
domain to an event queue using the FI_REG_MR flag. See fi_domain_bind.
When memory registration is asynchronous, in order to avoid a race condition
between the registration call returning and the corresponding reading
of the event from the EQ, the mr output parameter will
be written before any event associated with the operation may be read by
the application. An asynchronous event will not be generated unless the
registration call returns success (0).
.SS "fi_mr_reg"
The fi_mr_reg call registers the user-specified memory buffer with
the resource domain. The buffer is enabled for access by the fabric
hardware based on the provided access permissions. Supported access
permissions are the bitwise OR of the following:
.IP "FI_SEND"
The memory buffer may be used in outgoing message data transfers. This
includes fi_msg and fi_tagged operations.
.IP "FI_RECV"
The memory buffer may be used to receive inbound message transfers.
domain to an event queue using the FI_REG_MR flag.
See fi_domain_bind.
When memory registration is asynchronous, in order to avoid a race
condition between the registration call returning and the corresponding
reading of the event from the EQ, the mr output parameter will be
written before any event associated with the operation may be read by
the application.
An asynchronous event will not be generated unless the registration call
returns success (0).
.SS fi_mr_reg
.PP
The fi_mr_reg call registers the user-specified memory buffer with the
resource domain.
The buffer is enabled for access by the fabric hardware based on the
provided access permissions.
Supported access permissions are the bitwise OR of the following:
.PP
\f[I]FI_SEND\f[] : The memory buffer may be used in outgoing message
data transfers.
This includes fi_msg and fi_tagged operations.
.IP "FI_READ"
The memory buffer may be used as the result buffer for RMA read
and atomic operations on the initiator side.
.IP "FI_WRITE"
The memory buffer may be used as the source buffer for RMA write
and atomic operations on the initiator side.
.IP "FI_REMOTE_READ"
The memory buffer may be used as the source buffer of an RMA read
operation on the target side.
.IP "FI_REMOTE_WRITE"
The memory buffer may be used as the target buffer of an RMA write
or atomic operation.
.PP
\f[I]FI_RECV\f[] : The memory buffer may be used to receive inbound
message transfers.
This includes fi_msg and fi_tagged operations.
.PP
\f[I]FI_READ\f[] : The memory buffer may be used as the result buffer
for RMA read and atomic operations on the initiator side.
.PP
\f[I]FI_WRITE\f[] : The memory buffer may be used as the source buffer
for RMA write and atomic operations on the initiator side.
.PP
\f[I]FI_REMOTE_READ\f[] : The memory buffer may be used as the source
buffer of an RMA read operation on the target side.
.PP
\f[I]FI_REMOTE_WRITE\f[] : The memory buffer may be used as the target
buffer of an RMA write or atomic operation.
.PP
Registered memory is associated with a local memory descriptor and,
optionally, a remote memory key. A memory descriptor is a provider
specific identifier associated with registered memory. Memory descriptors
often map to hardware specific indices or keys associated with the
memory region. Remote memory keys provide limited protection against
unwanted access by a remote node. Remote accesses to a memory region
must provide the key associated with the registration.
optionally, a remote memory key.
A memory descriptor is a provider specific identifier associated with
registered memory.
Memory descriptors often map to hardware specific indices or keys
associated with the memory region.
Remote memory keys provide limited protection against unwanted access by
a remote node.
Remote accesses to a memory region must provide the key associated with
the registration.
.PP
Because MR keys must be provided by a remote process, an application
can use the requested_key parameter to indicate that a specific key
value be returned. Support for user requested keys is provider specific
and is determined by the FI_PROV_MR_KEY mode bit.
Access domains must be opened with the FI_PROV_MR_KEY mode cleared
in order to enable support for application selectable MR keys.
Because MR keys must be provided by a remote process, an application can
use the requested_key parameter to indicate that a specific key value be
returned.
Support for user requested keys is provider specific and is determined
by the FI_PROV_MR_ATTR mode bit.
Access domains must be opened with the FI_PROV_MR_ATTR mode cleared in
order to enable support for application selectable MR keys.
.PP
Remote RMA and atomic operations indicate the location within a registered
memory region by specifying an address. By default, the RMA target address
is a virtual address between the registered buf address and the end of the
registered memory region (buf + len). Typically, the starting virtual
address and length are provided to the RMA initiator, either during connection
setup or through separate communication messages.
Remote RMA and atomic operations indicate the location within a
registered memory region by specifying an address.
By default, the RMA target address is a 0-based offset between the
registered buf address and the end of the registered memory region (buf
+ len), unless the FI_PROV_MR_ATTR mode bit has been set.
If the FI_PROV_MR_ATTR mode bit is enabled, the RMA target address
defaults to the starting virtual address of buf.
.PP
In order to reduce the amount of state information an application must
maintain regarding target RMA buffers, the memory registration calls allow
a buffer to be associated with a specific target offset. This offset is
used on the initiator side in lieu of the virtual address. To associate
a memory region with a specific offset, the FI_MR_OFFSET flag must be
used when registering the memory.
The offset parameter is reserved for future use and must be 0.
.PP
For asynchronous memory registration requests, the result will be
reported to the user through an event queue associated with the resource
domain.
If successful, the allocated memory region structure will be returned to
the user through the mr parameter.
The mr address must remain valid until the registration operation
completes.
The context specified with the registration request is returned with the
completion event.
.SS fi_mr_regv
.PP
For asynchronous memory registration requests, the result
will be reported to the user through an event
queue associated with the resource domain. If successful, the allocated
memory region structure will be returned to the user through the mr
parameter. The mr address must remain valid until the registration
operation completes. The context specified with the registration request
is returned with the completion event.
.SS "fi_mr_regv"
The fi_mr_regv call adds support for a scatter-gather list to fi_mr_reg.
Multiple memory buffers are registered as a single memory region.
Otherwise, the operation is the same.
.SS "fi_mr_regattr"
Otherwise, the operation is the same.
.SS fi_mr_regattr
.PP
The fi_mr_regattr call is a more generic, extensible registration call
that allows the user to specify the registration request using a struct
fi_mr_attr.
.PP
.IP
.nf
struct fi_mr_attr {
const struct iovec *mr_iov; /* scatter-gather array */
size_t iov_count; /* # elements in mr_iov */
uint64_t access; /* access permission flags */
uint64_t requested_key; /* requested remote key */
void *context; /* user-defined context */
\f[C]
struct\ fi_mr_attr\ {
\ \ \ \ const\ struct\ iovec\ *mr_iov;\ \ \ \ \ \ \ /*\ scatter-gather\ array\ */
\ \ \ \ size_t\ \ \ \ \ \ \ \ \ \ \ \ \ iov_count;\ \ \ \ \ /*\ #\ elements\ in\ mr_iov\ */
\ \ \ \ uint64_t\ \ \ \ \ \ \ \ \ \ \ access;\ \ \ \ \ \ \ \ /*\ access\ permission\ flags\ */
\ \ \ \ uint64_t\ \ \ \ \ \ \ \ \ \ \ requested_key;\ /*\ requested\ remote\ key\ */
\ \ \ \ void\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ *context;\ \ \ \ \ \ /*\ user-defined\ context\ */
};
\f[]
.fi
.SS "fi_close"
.SS fi_close
.PP
Fi_close may be used to release all resources associated with a
registering a memory region. Once unregistered, further access to the
registered memory is not guaranteed. For performance reasons,
unregistration processing may be done asynchronously or lazily. To force
all queued unregistration requests to complete, applications may call
fi_sync on the domain. Upon completion of a domain fi_sync call, all memory
regions unregistered before fi_sync was invoked will have completed,
and no further access to the registered region, either locally or remotely,
via fabric resources will be possible.
.SS "fi_mr_desc / fi_mr_key"
registering a memory region.
Once unregistered, further access to the registered memory is not
guaranteed.
For performance reasons, unregistration processing may be done
asynchronously or lazily.
To force all queued unregistration requests to complete, applications
may call fi_sync on the domain.
Upon completion of a domain fi_sync call, all memory regions
unregistered before fi_sync was invoked will have completed, and no
further access to the registered region, either locally or remotely, via
fabric resources will be possible.
.SS fi_mr_desc / fi_mr_key
.PP
The local memory descriptor and remote protection key associated with a
MR may be obtained by calling fi_mr_desc and fi_mr_key, respectively.
The memory registration must have completed successfully before invoking
these calls.
.SS "fi_mr_bind"
.SS fi_mr_bind
.PP
The fi_mr_bind function associates a memory region with an event counter
or queue, for providers that support the generation of events based on
fabric operations. The type of events tracked against the memory region is
based on the bitwise OR of the following flags.
.IP "FI_WRITE"
Generates an event whenever a remote RMA write or atomic operation modify
the memory region.
.SH "FLAGS"
The following flags are usable with fi_mr_reg, fi_mr_regv, fi_mr_regattr.
.IP "FI_MR_KEY"
Indicates that the registered memory region should be associated with the
specified requested_key. If this flag is not provided, the requested_key
parameter is ignored.
.IP "FI_MR_OFFSET"
Associates the registered memory region with the specified offset as its
base target address. If this flag is not provided, the offset parameter
is ignored. When set, any overlapping registration is replaced.
.SH "RETURN VALUES"
Returns 0 on success. On error, a negative value corresponding to
fabric errno is returned.
fabric operations.
The type of events tracked against the memory region is based on the
bitwise OR of the following flags.
.PP
Fabric errno values are defined in
.IR "rdma/fi_errno.h".
.SH "ERRORS"
.IP "-FI_ENOKEY"
The requested_key is already in use.
.IP "-FI_EKEYREJECTED"
The requested_key is not available. They key may be out of the range
supported by the provider, or the provider may not support user-requested
memory registration keys.
.IP "-FI_ENOSYS"
Returned by fi_mr_bind if the provider does not support reporting events
based on access to registered memory regions.
.IP "-FI_EBADFLAGS"
Returned if the specified flags are not supported by the provider.
.SH "SEE ALSO"
fi_getinfo(3), fi_endpoint(3), fi_domain(3), fi_rma(3), fi_msg(3), fi_atomic(3)
\f[I]FI_WRITE\f[] : Generates an event whenever a remote RMA write or
atomic operation modify the memory region.
.SH FLAGS
.PP
The following flags are usable with fi_mr_reg, fi_mr_regv,
fi_mr_regattr.
.PP
\f[I]FI_MR_KEY\f[] : Indicates that the registered memory region should
be associated with the specified requested_key.
If this flag is not provided, the requested_key parameter is ignored.
It is an error to specify this flag on domains with the FI_PROV_MR_ATTR
mode bit set.
.PP
\f[I]FI_MR_OFFSET\f[] : Associates the registered memory region with the
specified offset as its base target address.
If this flag is not provided, the offset parameter is ignored.
When set, any overlapping registration is replaced.
.SH RETURN VALUES
.PP
Returns 0 on success.
On error, a negative value corresponding to fabric errno is returned.
.PP
Fabric errno values are defined in \f[C]rdma/fi_errno.h\f[].
.SH ERRORS
.PP
\f[I]-FI_ENOKEY\f[] : The requested_key is already in use.
.PP
\f[I]-FI_EKEYREJECTED\f[] : The requested_key is not available.
They key may be out of the range supported by the provider, or the
provider may not support user-requested memory registration keys.
.PP
\f[I]-FI_ENOSYS\f[] : Returned by fi_mr_bind if the provider does not
support reporting events based on access to registered memory regions.
.PP
\f[I]-FI_EBADFLAGS\f[] : Returned if the specified flags are not
supported by the provider.
.SH SEE ALSO
.PP
\f[C]fi_getinfo\f[](3), \f[C]fi_endpoint\f[](3), \f[C]fi_domain\f[](3),
\f[C]fi_rma\f[](3), \f[C]fi_msg\f[](3), \f[C]fi_atomic\f[](3)
.SH AUTHORS
OpenFabrics.

Просмотреть файл

@ -1,218 +1,246 @@
.TH "FI_MSG" 3 "2014-11-11" "Libfabric v0.0.2" "Libfabric Programmer's Manual" libfabric
.TH fi_msg 3 "2014\-12\-02" "Libfabric Programmer\[aq]s Manual" "\@VERSION\@"
.SH NAME
.PP
fi_msg - Message data transfer operations
.PP
fi_recv / fi_recvv
.br
fi_recvfrom / fi_recvmsg
.RS
.TP
.B fi_recv / fi_recvv / fi_recvmsg
Post a buffer to receive an incoming message
.RE
.PP
fi_send / fi_sendv
.br
fi_sendto / fi_sendmsg
.br
fi_inject / fi_injectto
.br
fi_senddata / fi_senddatato
.RS
Initiate an operation to send a message
.RE
.PP
fi_send / fi_sendv / fi_sendmsg
.PD 0
.P
.PD
fi_inject / fi_senddata : Initiate an operation to send a message
.SH SYNOPSIS
.B #include <rdma/fi_endpoint.h>
.HP
.BI "ssize_t fi_recv(struct fid_ep *" ep ", void * " buf ", size_t " len ","
.BI "void *" desc ", void * " context ");"
.HP
.BI "ssize_t fi_recvv(struct fid_ep *" ep ", const struct iovec * " iov ", void *" desc ","
.BI "size_t " count ", void * " context ");"
.HP
.BI "ssize_t fi_recvfrom(struct fid_ep *" ep ", void * " buf ", size_t " len ","
.BI "void *" desc ", fi_addr_t " src_addr ", void * " context ");"
.HP
.BI "ssize_t fi_recvmsg(struct fid_ep *" ep ", const struct fi_msg * " msg ", uint64_t " flags ");"
.PP
.HP
.BI "ssize_t fi_send(struct fid_ep *" ep ", void * " buf ", size_t " len ","
.BI "void *" desc ", void * " context ");"
.HP
.BI "ssize_t fi_sendv(struct fid_ep *" ep ", const void * " iov ", void *" desc ","
.BI "size_t " count ", void * " context ");"
.HP
.BI "ssize_t fi_sendto(struct fid_ep *" ep ", void * " buf ", size_t " len ","
.BI "void *" desc ", fi_addr_t " dest_addr ", void * " context ");"
.HP
.BI "ssize_t fi_sendmsg(struct fid_ep *" ep ", const struct fi_msg * " msg ", uint64_t " flags ");"
.HP
.BI "ssize_t fi_inject(struct fid_ep *" ep ", void * " buf ", size_t " len ");"
.HP
.BI "ssize_t fi_injectto(struct fid_ep *" ep ", void * " buf ", size_t " len ","
.BI "fi_addr_t " dest_addr ");"
.HP
.BI "ssize_t fi_senddata(struct fid_ep *" ep ", void * " buf ", size_t " len ","
.BI "void *" desc ", uint64_t " data ", void * " context ");"
.HP
.BI "ssize_t fi_senddatato(struct fid_ep *" ep ", void * " buf ", size_t " len ","
.BI "void *" desc ", uint64_t " data ", fi_addr_t " dest_addr ", void * " context ");"
.IP
.nf
\f[C]
#include\ <rdma/fi_endpoint.h>
ssize_t\ fi_recv(struct\ fid_ep\ *ep,\ void\ *\ buf,\ size_t\ len,
\ \ \ \ void\ *desc,\ fi_addr_t\ src_addr,\ void\ *context);
ssize_t\ fi_recvv(struct\ fid_ep\ *ep,\ const\ struct\ iovec\ *iov,\ void\ *desc,
\ \ \ \ size_t\ count,\ fi_addr_t\ src_addr,\ void\ *context);
ssize_t\ fi_recvmsg(struct\ fid_ep\ *ep,\ const\ struct\ fi_msg\ *msg,
\ \ \ \ uint64_t\ flags);
ssize_t\ fi_send(struct\ fid_ep\ *ep,\ void\ *buf,\ size_t\ len,
\ \ \ \ void\ *desc,\ fi_addr_t\ dest_addr,\ void\ *context);
ssize_t\ fi_sendv(struct\ fid_ep\ *ep,\ const\ void\ *iov,\ void\ *desc,
\ \ \ \ size_t\ count,\ fi_addr_t\ dest_addr,\ void\ *context);
ssize_t\ fi_sendmsg(struct\ fid_ep\ *ep,\ const\ struct\ fi_msg\ *msg,
\ \ \ \ uint64_t\ flags);
ssize_t\ fi_inject(struct\ fid_ep\ *ep,\ void\ *buf,\ size_t\ len,
\ \ \ \ fi_addr_t\ dest_addr);
ssize_t\ fi_senddata(struct\ fid_ep\ *ep,\ void\ *buf,\ size_t\ len,
\ \ \ \ void\ *desc,\ uint64_t\ data,\ fi_addr_t\ dest_addr,\ void\ *context);
\f[]
.fi
.SH ARGUMENTS
.IP "ep"
Fabric endpoint on which to initiate send or post receive buffer.
.IP "buf"
Data buffer to send or receive.
.IP "len"
Length of data buffer to send or receive, specified in bytes. Valid transfers
are from 0 bytes up to the endpoint's max_msg_size.
.IP "iov"
Vectored data buffer.
.IP "count"
Count of vectored data entries.
.IP "desc"
Descriptor associated with the data buffer
.IP "data"
Remote CQ data to transfer with the sent message.
.IP "dest_addr"
Destination address for connectionless transfers
.IP "src_addr"
Source address to receive from for connectionless transfers
.IP "msg"
Message descriptor for send and receive operations.
.IP "flags"
Additional flags to apply for the send or receive operation.
.IP "context"
User specified pointer to associate with the operation.
.SH "DESCRIPTION"
The send functions -- fi_send, fi_sendv, fi_sendto, fi_sendmsg,
fi_inject, fi_injectto, fi_senddata, and fi_senddatato -- are used
to transmit a message from one
endpoint to another endpoint. The main difference between send functions
are the number and type of parameters that they accept as input. Otherwise,
they perform the same general function. Messages sent using fi_msg operations
are received by a remote endpoint into a buffer posted to receive such messages.
.PP
The receive functions -- fi_recv, fi_recvv, fi_recvfrom,
fi_recvmsg -- post a data buffer to an endpoint to receive
inbound messages. Similar to the send operations, receive operations operate
asynchronously. Users should not touch the posted data buffer(s) until the
receive operation has completed.
\f[I]ep\f[] : Fabric endpoint on which to initiate send or post receive
buffer.
.PP
\f[I]buf\f[] : Data buffer to send or receive.
.PP
\f[I]len\f[] : Length of data buffer to send or receive, specified in
bytes.
Valid transfers are from 0 bytes up to the endpoint\[aq]s max_msg_size.
.PP
\f[I]iov\f[] : Vectored data buffer.
.PP
\f[I]count\f[] : Count of vectored data entries.
.PP
\f[I]desc\f[] : Descriptor associated with the data buffer
.PP
\f[I]data\f[] : Remote CQ data to transfer with the sent message.
.PP
\f[I]dest_addr\f[] : Destination address for connectionless transfers.
Ignored for connected endpoints.
.PP
\f[I]src_addr\f[] : Source address to receive from for connectionless
transfers.
Ignored for connected endpoints.
.PP
\f[I]msg\f[] : Message descriptor for send and receive operations.
.PP
\f[I]flags\f[] : Additional flags to apply for the send or receive
operation.
.PP
\f[I]context\f[] : User specified pointer to associate with the
operation.
.SH DESCRIPTION
.PP
The send functions -- fi_send, fi_sendv, fi_sendmsg, fi_inject, and
fi_senddata -- are used to transmit a message from one endpoint to
another endpoint.
The main difference between send functions are the number and type of
parameters that they accept as input.
Otherwise, they perform the same general function.
Messages sent using fi_msg operations are received by a remote endpoint
into a buffer posted to receive such messages.
.PP
The receive functions -- fi_recv, fi_recvv, fi_recvmsg -- post a data
buffer to an endpoint to receive inbound messages.
Similar to the send operations, receive operations operate
asynchronously.
Users should not touch the posted data buffer(s) until the receive
operation has completed.
.PP
Completed message operations are reported to the user through one or
more event collectors associated with the endpoint.
Users provide context which are associated with each operation, and is
returned to the user as part of the event completion.
See fi_eq for completion event details.
.SS fi_send
.PP
Completed message operations are reported to the user through one or more event
collectors associated with the endpoint. Users provide context which are
associated with each operation, and is returned to the user
as part of the event completion. See fi_eq for completion event details.
.SS "fi_send"
The call fi_send transfers the data contained in the user-specified data
buffer to a remote endpoint, with message boundaries being maintained.
The local endpoint must be connected to a remote endpoint or destination
before fi_send is called. Unless the endpoint has been configured differently,
the data buffer passed into fi_send must not be touched by the application
until the fi_send call completes asynchronously.
.SS "fi_sendv"
The fi_sendv call adds support for a scatter-gather list to fi_send and/or
fi_sendmem. The fi_sendv transfers the set of data buffers referenced by
the iov parameter to a remote endpoint as a single message. The format of
iov parameter is specified by the user when the endpoint is created. See
fi_getinfo for more details on iov formats.
.SS "fi_sendto"
The fi_sendto function is equivalent to fi_send for unconnected endpoints.
.SS "fi_sendmsg"
The fi_sendmsg call supports data transfers over both connected and unconnected
endpoints, with the ability to control the send operation per call through the
use of flags. The fi_sendmsg function takes a struct fi_msg as input.
before fi_send is called.
Unless the endpoint has been configured differently, the data buffer
passed into fi_send must not be touched by the application until the
fi_send call completes asynchronously.
.SS fi_sendv
.PP
The fi_sendv call adds support for a scatter-gather list to fi_send.
The fi_sendv transfers the set of data buffers referenced by the iov
parameter to a remote endpoint as a single message.
.SS fi_sendmsg
.PP
The fi_sendmsg call supports data transfers over both connected and
unconnected endpoints, with the ability to control the send operation
per call through the use of flags.
The fi_sendmsg function takes a \f[C]struct\ fi_msg\f[] as input.
.IP
.nf
struct fi_msg {
const struct iovec *msg_iov;/* scatter-gather array */
void **desc; /* local request descriptor */
size_t iov_count;/* # elements in iov */
const void *addr; /* optional endpoint address */
void *context;/* user-defined context */
uint64_t data; /* optional message data */
\f[C]
struct\ fi_msg\ {
\ \ \ \ const\ struct\ iovec\ *msg_iov;/*\ scatter-gather\ array\ */
\ \ \ \ void\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ **desc;\ \ /*\ local\ request\ descriptor\ */
\ \ \ \ size_t\ \ \ \ \ \ \ \ \ \ \ \ \ iov_count;/*\ #\ elements\ in\ iov\ */
\ \ \ \ const\ void\ \ \ \ \ \ \ \ \ *addr;\ \ \ /*\ optional\ endpoint\ address\ */
\ \ \ \ void\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ *context;/*\ user-defined\ context\ */
\ \ \ \ uint64_t\ \ \ \ \ \ \ \ \ \ \ data;\ \ \ \ /*\ optional\ message\ data\ */
};
\f[]
.fi
.SS "fi_inject"
The send inject call is an optimized version of fi_send. The fi_inject
function behaves as if the FI_INJECT transfer flag were set, and
FI_EVENT were not. That is, the data buffer is available for reuse
immediately on returning from from fi_inject, and no completion event will
be generated for this send. The completion event will be suppressed even if
the endpoint has not been configured with FI_EVENT. See the flags
discussion below for more details.
.SS "fi_injectto"
This call is similar to fi_inject, but for unconnected endpoints.
.SS "fi_senddata"
.SS fi_inject
.PP
The send inject call is an optimized version of fi_send.
The fi_inject function behaves as if the FI_INJECT transfer flag were
set, and FI_COMPLETION were not.
That is, the data buffer is available for reuse immediately on returning
from from fi_inject, and no completion event will be generated for this
send.
The completion event will be suppressed even if the endpoint has not
been configured with FI_COMPLETION.
See the flags discussion below for more details.
.SS fi_senddata
.PP
The send data call is similar to fi_send, but allows for the sending of
remote CQ data (see FI_REMOTE_CQ_DATA flag) as part of the transfer.
.SS "fi_senddatato"
This call is similar to fi_senddata, but for unconnected endpoints.
.SS "fi_recv"
The fi_recv call posts a data buffer to the receive queue of the corresponding
endpoint. Posted receives are matched with inbound sends in the order in which
they were posted. Message boundaries are maintained. The order
in which the receives complete is dependent on the endpoint type and protocol.
.SS "fi_recvfrom"
The fi_recvfrom call is equivalent to fi_recv
for unconnected endpoints. These calls are used to indicate
that a buffer should be posted to receive incoming data from a specific
remote endpoint.
.SS "fi_recvmsg"
The fi_recvmsg call supports posting buffers over both connected and unconnected
endpoints, with the ability to control the receive operation per call through the
use of flags. The fi_recvmsg function takes a struct fi_msg as input.
.SH "FLAGS"
.SS fi_recv
.PP
The fi_recv call posts a data buffer to the receive queue of the
corresponding endpoint.
Posted receives are searched in the order in which they were posted in
order to match sends.
Message boundaries are maintained.
The order in which the receives complete is dependent on the endpoint
type and protocol.
For unconnected endpoints, the src_addr parameter can be used to
indicate that a buffer should be posted to receive incoming data from a
specific remote endpoint.
.SS fi_recvv
.PP
The fi_recvv call adds support for a scatter-gather list to fi_recv.
The fi_recvv posts the set of data buffers referenced by the iov
parameter to a receive incoming data.
.SS fi_recvmsg
.PP
The fi_recvmsg call supports posting buffers over both connected and
unconnected endpoints, with the ability to control the receive operation
per call through the use of flags.
The fi_recvmsg function takes a struct fi_msg as input.
.SH FLAGS
.PP
The fi_recvmsg and fi_sendmsg calls allow the user to specify flags
which can change the default message handling of the endpoint.
Flags specified with fi_recvmsg / fi_sendmsg override most flags
previously configured with the endpoint, except where noted (see fi_endpoint).
The following list of flags are usable with fi_recvmsg and/or fi_sendmsg.
.IP "FI_REMOTE_CQ_DATA"
Applies to fi_sendmsg, fi_senddata, and fi_senddatato. Indicates that remote
CQ data is available and should be sent as part of the request. See fi_getinfo
for additional details on FI_REMOTE_CQ_DATA.
.IP "FI_EVENT"
Indicates that a completion entry should be generated for the specified
operation. The endpoint must be bound to an event queue
with FI_EVENT that corresponds to the specified operation, or this flag
is ignored.
.IP "FI_MORE"
Indicates that the user has additional requests that will immediately be
posted after the current call returns. Use of this flag may improve
performance by enabling the provider to optimize its access to the fabric
hardware.
.IP "FI_REMOTE_SIGNAL"
Indicates that a completion event at the target process should be generated
for the given operation. The remote endpoint must be configured with
FI_REMOTE_SIGNAL, or this flag will be ignored by the target.
.IP "FI_INJECT"
Applies to fi_sendmsg. Indicates that the outbound data buffer should be
returned to user immediately after the send call returns, even if the operation
is handled asynchronously. This may require that the underlying provider
implementation copy the data into a local buffer and transfer out of that
buffer.
.IP "FI_MULTI_RECV"
Applies to posted receive operations. This flag allows the user to post a
single buffer that will receive multiple incoming messages. Received
messages will be packed into the receive buffer until the buffer has been
consumed. Use of this flag may cause a single posted receive operation
to generate multiple events as messages are placed into the buffer.
previously configured with the endpoint, except where noted (see
fi_endpoint).
The following list of flags are usable with fi_recvmsg and/or
fi_sendmsg.
.PP
\f[I]FI_REMOTE_CQ_DATA\f[] : Applies to fi_sendmsg and fi_senddata.
Indicates that remote CQ data is available and should be sent as part of
the request.
See fi_getinfo for additional details on FI_REMOTE_CQ_DATA.
.PP
\f[I]FI_COMPLETION\f[] : Indicates that a completion entry should be
generated for the specified operation.
The endpoint must be bound to an event queue with FI_COMPLETION that
corresponds to the specified operation, or this flag is ignored.
.PP
\f[I]FI_MORE\f[] : Indicates that the user has additional requests that
will immediately be posted after the current call returns.
Use of this flag may improve performance by enabling the provider to
optimize its access to the fabric hardware.
.PP
\f[I]FI_REMOTE_SIGNAL\f[] : Indicates that a completion event at the
target process should be generated for the given operation.
The remote endpoint must be configured with FI_REMOTE_SIGNAL, or this
flag will be ignored by the target.
.PP
\f[I]FI_INJECT\f[] : Applies to fi_sendmsg.
Indicates that the outbound data buffer should be returned to user
immediately after the send call returns, even if the operation is
handled asynchronously.
This may require that the underlying provider implementation copy the
data into a local buffer and transfer out of that buffer.
.PP
\f[I]FI_MULTI_RECV\f[] : Applies to posted receive operations.
This flag allows the user to post a single buffer that will receive
multiple incoming messages.
Received messages will be packed into the receive buffer until the
buffer has been consumed.
Use of this flag may cause a single posted receive operation to generate
multiple events as messages are placed into the buffer.
The placement of received data into the buffer may be subjected to
provider specific alignment restrictions. The buffer will be freed from
the endpoint when the available buffer space falls below the network's
MTU size (see FI_OPT_MIN_MULTI_RECV).
.IP "FI_REMOTE_COMPLETE"
Applies to fi_sendmsg. Indicates that a completion should not be generated
until the operation has completed on the remote side.
.SH "RETURN VALUE"
Returns 0 on success. On error, a negative value corresponding to fabric
errno is returned. Fabric errno values are defined in
.IR "rdma/fi_errno.h".
.SH "ERRORS"
.IP "-FI_EAGAIN"
Indicates that the underlying provider currently lacks the resources needed
to initiate the requested operation. This may be the result of insufficient
internal buffering, in the case of FI_SEND_BUFFERED, or processing queues
are full. The operation may be retried after additional provider resources
become available, usually through the completion of currently outstanding
provider specific alignment restrictions.
The buffer will be freed from the endpoint when the available buffer
space falls below the network\[aq]s MTU size (see
FI_OPT_MIN_MULTI_RECV).
.PP
\f[I]FI_REMOTE_COMPLETE\f[] : Applies to fi_sendmsg.
Indicates that a completion should not be generated until the operation
has completed on the remote side.
.SH RETURN VALUE
.PP
Returns 0 on success.
On error, a negative value corresponding to fabric errno is returned.
Fabric errno values are defined in \f[C]rdma/fi_errno.h\f[].
.SH ERRORS
.PP
\f[I]-FI_EAGAIN\f[] : Indicates that the underlying provider currently
lacks the resources needed to initiate the requested operation.
This may be the result of insufficient internal buffering, in the case
of FI_INJECT, or processing queues are full.
The operation may be retried after additional provider resources become
available, usually through the completion of currently outstanding
operations.
.SH "SEE ALSO"
fi_getinfo(3), fi_endpoint(3), fi_domain(3), fi_eq(3)
.SH SEE ALSO
.PP
\f[C]fi_getinfo\f[](3), \f[C]fi_endpoint\f[](3), \f[C]fi_domain\f[](3),
\f[C]fi_eq\f[](3)
.SH AUTHORS
OpenFabrics.

Просмотреть файл

@ -1,200 +1,224 @@
.TH "FI_POLL" 3 "2014-11-11" "Libfabric v0.0.2" "Libfabric Programmer's Manual" libfabric
.TH fi_poll 3 "2014-11-14" "Libfabric Programmer\[aq]s Manual" "\@VERSION\@"
.SH NAME
fi_poll \- Polling and wait set operations
.PP
fi_poll_open / fi_close
.RS
Open/close a polling set
.RE
fi_poll - Polling and wait set operations
.PP
fi_poll_add / fi_poll_del
.RS
Add/remove an event queue or counter to/from a poll set.
.RE
fi_poll_open / fi_close : Open/close a polling set
.PP
fi_poll
.RS
Poll for progress and events across multiple event queues.
.RE
fi_poll_add / fi_poll_del : Add/remove an event queue or counter to/from
a poll set.
.PP
fi_wait_open / fi_close
.RS
Open/close a wait set
.RE
fi_poll : Poll for progress and events across multiple event queues.
.PP
fi_wait
.RS
Waits for one or more wait objects in a set to be signaled.
.RE
fi_wait_open / fi_close : Open/close a wait set
.PP
fi_wait : Waits for one or more wait objects in a set to be signaled.
.SH SYNOPSIS
.B #include <rdma/fi_domain.h>
.HP
.BI "int fi_poll_open(struct fid_domain *" domain ", struct fi_poll_attr *" attr ", "
.BI "struct fid_poll **" pollset ");"
.HP
.BI "int fi_close(struct fid *" pollset ");"
.HP
.BI "int fi_poll_add(struct fid_poll *" pollset ", struct fid *" event_fid ", "
.BI "uint64_t " flags ");"
.HP
.BI "int fi_poll_del(struct fid_poll *" pollset ", struct fid *" event_fid ", "
.BI "uint64_t " flags ");"
.HP
.BI "int fi_poll(struct fid_poll *" pollset ", void **" context ", "
.BI "int " count ");"
.HP
.BI "int fi_wait_open(struct fid_domain *" domain ", struct fi_wait_attr *" attr ", "
.BI "struct fid_wait **" waitset ");"
.HP
.BI "int fi_close(struct fid *" waitset ");"
.HP
.BI "int fi_control(struct fid *" waitset ", int " command ", "
.BI "void *" arg ");"
.HP
.BI "int fi_wait(struct fid_wait *" waitset ", int " timeout ");"
.IP
.nf
\f[C]
#include\ <rdma/fi_domain.h>
int\ fi_poll_open(struct\ fid_domain\ *domain,\ struct\ fi_poll_attr\ *attr,
\ \ \ \ struct\ fid_poll\ **pollset);
int\ fi_close(struct\ fid\ *pollset);
int\ fi_poll_add(struct\ fid_poll\ *pollset,\ struct\ fid\ *event_fid,
\ \ \ \ uint64_t\ flags);
int\ fi_poll_del(struct\ fid_poll\ *pollset,\ struct\ fid\ *event_fid,
\ \ \ \ uint64_t\ flags);
int\ fi_poll(struct\ fid_poll\ *pollset,\ void\ **context,\ int\ count);
int\ fi_wait_open(struct\ fid_domain\ *domain,\ struct\ fi_wait_attr\ *attr,
\ \ \ \ struct\ fid_wait\ **waitset);
int\ fi_close(struct\ fid\ *waitset);
int\ fi_control(struct\ fid\ *waitset,\ int\ command,\ void\ *arg);
int\ fi_wait(struct\ fid_wait\ *waitset,\ int\ timeout);
\f[]
.fi
.SH ARGUMENTS
.IP "domain"
Resource domain
.IP "pollset"
Event poll set
.IP "waitset"
Wait object set
.IP "attr"
Poll or wait set attributes
.IP "context"
On success, an array of user context values associated with an event
queue or counter.
.IP "count"
Number of entries in context array.
.IP "timeout"
Time to wait for a signal, in milliseconds.
.SH "DESCRIPTION"
.SS "fi_poll_open"
fi_poll_open creates a new polling set. A poll set enables an optimized
method for progressing asynchronous operations across multiple event
queues and counters and checking for their completions.
.PP
\f[I]domain\f[] : Resource domain
.PP
\f[I]pollset\f[] : Event poll set
.PP
\f[I]waitset\f[] : Wait object set
.PP
\f[I]attr\f[] : Poll or wait set attributes
.PP
\f[I]context\f[] : On success, an array of user context values
associated with an event queue or counter.
.PP
\f[I]count\f[] : Number of entries in context array.
.PP
\f[I]timeout\f[] : Time to wait for a signal, in milliseconds.
.SH DESCRIPTION
.SS fi_poll_open
.PP
fi_poll_open creates a new polling set.
A poll set enables an optimized method for progressing asynchronous
operations across multiple event queues and counters and checking for
their completions.
.PP
A poll set is defined with the following attributes.
.PP
.IP
.nf
struct fi_poll_attr {
int mask; /* valid attr fields */
uint64_t flags; /* operation flags */
\f[C]
struct\ fi_poll_attr\ {
\ \ \ \ int\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ mask;\ \ \ \ \ \ /*\ valid\ attr\ fields\ */
\ \ \ \ uint64_t\ \ \ \ \ \ \ \ \ \ \ \ \ flags;\ \ \ \ \ /*\ operation\ flags\ */
};
\f[]
.fi
.IP "mask"
The mask field is used for forward and backward API compatibility. It is
used by the application to indicate which fields in the attribute structure
have been set. For this version of the API, mask should be set to
FI_POLL_ATTR_MASK_V1, indicating that all specified fields have been initialized.
.IP "flags"
Flags that set the default operation of the poll set. The use of this field
is reserved and must be set to 0 by the caller.
.SS "fi_close"
.PP
\f[I]mask\f[] : The mask field is used for forward and backward API
compatibility.
It is used by the application to indicate which fields in the attribute
structure have been set.
For this version of the API, mask should be set to FI_POLL_ATTR_MASK_V1,
indicating that all specified fields have been initialized.
.PP
\f[I]flags\f[] : Flags that set the default operation of the poll set.
The use of this field is reserved and must be set to 0 by the caller.
.SS fi_close
.PP
The fi_close call releases all resources associated with a poll set.
The poll set must not be associated with any other resources prior to
being closed.
.SS "fi_poll_add"
.SS fi_poll_add
.PP
Associates an event queue or counter with a poll set.
.SS "fi_poll_del"
.SS fi_poll_del
.PP
Removes an event queue or counter from a poll set.
.SS "fi_poll"
.SS fi_poll
.PP
Progresses all event queues and counters associated with a poll set and
checks for events. If events has occurred, contexts associated with
the event queues and/or counters are returned. The number of contexts is
limited to the size of the context array, indicated by the count parameter.
.SS "fi_wait_open"
fi_wait_open allocates a new wait set. A wait set enables an optimized method
of waiting for events across multiple event queues and counters. Where
possible, a wait set uses a single underlying wait object that is signaled
when a specified condition occurs on an associated event queue or counter.
checks for events.
If events has occurred, contexts associated with the event queues and/or
counters are returned.
The number of contexts is limited to the size of the context array,
indicated by the count parameter.
.SS fi_wait_open
.PP
The properties and behavior of a wait set are defined by struct fi_wait_attr.
fi_wait_open allocates a new wait set.
A wait set enables an optimized method of waiting for events across
multiple event queues and counters.
Where possible, a wait set uses a single underlying wait object that is
signaled when a specified condition occurs on an associated event queue
or counter.
.PP
The properties and behavior of a wait set are defined by struct
fi_wait_attr.
.IP
.nf
struct fi_wait_attr {
int mask; /* valid attr fields */
enum fi_wait_obj wait_obj; /* requested wait object */
uint64_t flags; /* operation flags */
\f[C]
struct\ fi_wait_attr\ {
\ \ \ \ int\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ mask;\ \ \ \ \ \ /*\ valid\ attr\ fields\ */
\ \ \ \ enum\ fi_wait_obj\ \ \ \ \ wait_obj;\ \ /*\ requested\ wait\ object\ */
\ \ \ \ uint64_t\ \ \ \ \ \ \ \ \ \ \ \ \ flags;\ \ \ \ \ /*\ operation\ flags\ */
};
\f[]
.fi
.IP "mask"
The mask field is used for forward and backward API compatibility. It is
used by the application to indicate which fields in the attribute structure
have been set. For this version of the API, mask should be set to
FI_WAIT_ATTR_MASK_V1, indicating that all specified fields have been initialized.
.IP "wait_obj"
Wait sets are associated with specific wait object(s). Wait objects allow
applications to block until the wait object is signaled, indicating that
an event is available to be read. Users may use fi_control to retrieve
the underlying wait object(s) associated with a wait set, in order to use it in
other system calls. The following values may be used to specify the type
of wait object associated with an wait set: FI_WAIT_UNSPEC,
FI_WAIT_FD, and FI_WAIT_MUT_COND.
.RS
.IP "FI_WAIT_UNSPEC"
Specifies that the user will only wait on the wait set using fabric interface
calls, such as fi_wait. In this case, the underlying provider may
select the most appropriate or highest performing wait object available,
including custom wait mechanisms. Applications that select
FI_WAIT_UNSPEC are not guaranteed to retrieve the underlying wait
object.
.IP "FI_WAIT_FD"
Indicates that the wait set should use a file descriptor as its wait mechanism.
.PP
\f[I]mask\f[] : The mask field is used for forward and backward API
compatibility.
It is used by the application to indicate which fields in the attribute
structure have been set.
For this version of the API, mask should be set to FI_WAIT_ATTR_MASK_V1,
indicating that all specified fields have been initialized.
.PP
\f[I]wait_obj\f[] : Wait sets are associated with specific wait
object(s).
Wait objects allow applications to block until the wait object is
signaled, indicating that an event is available to be read.
Users may use fi_control to retrieve the underlying wait object(s)
associated with a wait set, in order to use it in other system calls.
The following values may be used to specify the type of wait object
associated with an wait set: FI_WAIT_UNSPEC, FI_WAIT_FD, and
FI_WAIT_MUT_COND.
.IP \[bu] 2
\f[I]FI_WAIT_UNSPEC\f[] : Specifies that the user will only wait on the
wait set using fabric interface calls, such as fi_wait.
In this case, the underlying provider may select the most appropriate or
highest performing wait object available, including custom wait
mechanisms.
Applications that select FI_WAIT_UNSPEC are not guaranteed to retrieve
the underlying wait object.
.IP \[bu] 2
\f[I]FI_WAIT_FD\f[] : Indicates that the wait set should use a file
descriptor as its wait mechanism.
A file descriptor wait object must be usable in select, poll, and epoll
routines. However, a provider may signal an FD wait object by marking it
as readable, writable, or with an error.
.IP "FI_WAIT_MUT_COND"
Specifies that the wait set should use a pthread mutex and cond variable as a
wait object.
.RE
.IP "flags"
Flags that set the default operation of the wait set. The use of this field
is reserved and must be set to 0 by the caller.
.SS "fi_close"
routines.
However, a provider may signal an FD wait object by marking it as
readable, writable, or with an error.
.IP \[bu] 2
\f[I]FI_WAIT_MUT_COND\f[] : Specifies that the wait set should use a
pthread mutex and cond variable as a wait object.
.PP
\f[I]flags\f[] : Flags that set the default operation of the wait set.
The use of this field is reserved and must be set to 0 by the caller.
.SS fi_close
.PP
The fi_close call releases all resources associated with a wait set.
The wait set must not be bound to any other opened resources prior to
being closed.
.SS "fi_control"
The fi_control call is used to access provider or implementation specific
details of the wait set. Access to the wait set should be serialized
across all calls when fi_control is invoked, as it may redirect the
implementation of wait set operations. The following control commands are usable
with a wait set.
.IP "FI_GETWAIT (void **)"
This command allows the user to retrieve the low-level wait object(s)
associated with the wait set. The format of the wait-object is specified during
wait set creation, through the wait set attributes. The fi_control arg
parameter should be an address to a struct fi_wait_obj_set.
.SS fi_control
.PP
The fi_control call is used to access provider or implementation
specific details of the wait set.
Access to the wait set should be serialized across all calls when
fi_control is invoked, as it may redirect the implementation of wait set
operations.
The following control commands are usable with a wait set.
.PP
\f[I]FI_GETWAIT (void **)\f[] : This command allows the user to retrieve
the low-level wait object(s) associated with the wait set.
The format of the wait-object is specified during wait set creation,
through the wait set attributes.
The fi_control arg parameter should be an address to a struct
fi_wait_obj_set.
.IP
.nf
struct fi_wait_obj_set {
size_t len; /* size of obj array entries */
enum fi_wait_obj wait_obj; /* type of wait obj */
void *obj; /* array of wait objects */
\f[C]
struct\ fi_wait_obj_set\ {
\ \ \ \ size_t\ \ \ \ \ \ \ \ \ \ \ \ len;\ \ \ \ \ \ /*\ size\ of\ obj\ array\ entries\ */
\ \ \ \ enum\ fi_wait_obj\ \ wait_obj;\ /*\ type\ of\ wait\ obj\ */
\ \ \ \ void\ \ \ \ \ \ \ \ \ \ \ \ \ *obj;\ \ \ \ \ \ /*\ array\ of\ wait\ objects\ */
};
\f[]
.fi
.PP
On input, len should indicate the size in bytes referenced by the obj
field. On output, the needed size will be returned. The underlying wait
objects will be returned in the obj array. If insufficient space is provided,
the results will be truncated. The wait_obj field may be used to identify
the format of the wait objects.
.SS "fi_wait"
field.
On output, the needed size will be returned.
The underlying wait objects will be returned in the obj array.
If insufficient space is provided, the results will be truncated.
The wait_obj field may be used to identify the format of the wait
objects.
.SS fi_wait
.PP
Waits on a wait set until one or more of its underlying wait objects is
signaled.
.SH "RETURN VALUES"
Returns 0 on success. On error, a negative value corresponding to
fabric errno is returned.
.SH RETURN VALUES
.PP
Fabric errno values are defined in
.IR "rdma/fi_errno.h".
Returns 0 on success.
On error, a negative value corresponding to fabric errno is returned.
.PP
fi_poll
.RS
On success, if events are available, returns the number of entries written
to the context array.
.SH "NOTES"
.SH "SEE ALSO"
fi_getinfo(3), fi_domain(3), fi_cntr(3), fi_eq(3)
Fabric errno values are defined in \f[C]rdma/fi_errno.h\f[].
.PP
fi_poll : On success, if events are available, returns the number of
entries written to the context array.
.SH NOTES
.SH SEE ALSO
.PP
\f[C]fi_getinfo\f[](3), \f[C]fi_domain\f[](3), \f[C]fi_cntr\f[](3),
\f[C]fi_eq\f[](3)
.SH AUTHORS
OpenFabrics.

Просмотреть файл

@ -1 +0,0 @@
.so man3/fi_msg.3

Просмотреть файл

@ -1,230 +1,253 @@
.TH "FI_RMA" 3 "2014-11-11" "Libfabric v0.0.2" "Libfabric Programmer's Manual" libfabric
.TH fi_rma 3 "2014\-12\-02" "Libfabric Programmer\[aq]s Manual" "\@VERSION\@"
.SH NAME
.PP
fi_rma - Remote memory access operations
.PP
fi_read / fi_readv
.br
fi_readfrom / fi_readmsg
.RS
.TP
.B fi_read / fi_readv / fi_readmsg
Initiates a read from remote memory
.RE
.PP
fi_write / fi_writev
.br
fi_writeto / fi_writemsg
.br
fi_inject_write / fi_inject_writeto
.br
fi_writedata / fi_writedatato
.RS
Initiate a write to remote memory
.RE
.PP
fi_write / fi_writev / fi_writemsg
.PD 0
.P
.PD
fi_inject_write / fi_writedata : Initiate a write to remote memory
.SH SYNOPSIS
.B #include <rdma/fi_rma.h>
.HP
.BI "ssize_t fi_read(struct fid_ep *" ep ", void * " buf ", size_t " len ", void *" desc ","
.BI "uint64_t " addr ", uint64_t " key ", void * " context ");"
.HP
.BI "ssize_t fi_readv(struct fid_ep *" ep ", const struct iovec * " iov ", void **" desc ","
.BI "size_t " count ", uint64_t " addr ", uint64_t " key ", void * " context ");"
.HP
.BI "ssize_t fi_readfrom(struct fid_ep *" ep ", void * " buf ", size_t " len ", void *" desc ","
.BI "fi_addr_t " src_addr ", uint64_t " addr ", uint64_t " key ","
.BI "void * " context ");"
.HP
.BI "ssize_t fi_readmsg(struct fid_ep *" ep ", const struct fi_msg_rma * " msg ", uint64_t " flags ");"
.PP
.HP
.BI "ssize_t fi_write(struct fid_ep *" ep ", const void * " buf ", size_t " len ", void *" desc ","
.BI "uint64_t " addr ", uint64_t " key ", void * " context ");"
.HP
.BI "ssize_t fi_writev(struct fid_ep *" ep ", const struct iovec * " iov ", void **" desc ","
.BI "size_t " count ", uint64_t " addr ", uint64_t " key ", void * " context ");"
.HP
.BI "ssize_t fi_writeto(struct fid_ep *" ep ", const void * " buf ", size_t " len ", void *" desc ","
.BI "fi_addr_t " dest_addr ", uint64_t " addr ", uint64_t " key ","
.BI "void * " context ");"
.HP
.BI "ssize_t fi_writemsg(struct fid_ep *" ep ", const struct fi_msg_rma * " msg ", uint64_t " flags ");"
.HP
.BI "ssize_t fi_inject_write(struct fid_ep *" ep ", const void * " buf ", size_t " len ","
.BI "uint64_t " addr ", uint64_t " key ");"
.HP
.BI "ssize_t fi_inject_writeto(struct fid_ep *" ep ", const void * " buf ", size_t " len ","
.BI "fi_addr_t " dest_addr ", uint64_t " addr ", uint64_t " key ");"
.HP
.BI "ssize_t fi_writedata(struct fid_ep *" ep ", const void * " buf ", size_t " len ", void *" desc ","
.BI "uint64_t " data ", uint64_t " addr ", uint64_t " key ", void * " context ");"
.HP
.BI "ssize_t fi_writedatato(struct fid_ep *" ep ", const void * " buf ", size_t " len ", void *" desc ","
.BI "uint64_t " data ", fi_addr_t " dest_addr ", uint64_t " addr ", uint64_t " key ","
.BI "void * " context ");"
.IP
.nf
\f[C]
#include\ <rdma/fi_rma.h>
ssize_t\ fi_read(struct\ fid_ep\ *ep,\ void\ *buf,\ size_t\ len,\ void\ *desc,
\ \ \ \ fi_addr_t\ src_addr,\ uint64_t\ addr,\ uint64_t\ key,\ void\ *context);
ssize_t\ fi_readv(struct\ fid_ep\ *ep,\ const\ struct\ iovec\ *iov,\ void\ **desc,
\ \ \ \ size_t\ count,\ fi_addr_t\ src_addr,\ uint64_t\ addr,\ uint64_t\ key,
\ \ \ \ void\ *context);
ssize_t\ fi_readmsg(struct\ fid_ep\ *ep,\ const\ struct\ fi_msg_rma\ *msg,
\ \ \ \ uint64_t\ flags);
ssize_t\ fi_write(struct\ fid_ep\ *ep,\ const\ void\ *buf,\ size_t\ len,
\ \ \ \ void\ *desc,\ fi_addr_t\ dest_addr,\ uint64_t\ addr,\ uint64_t\ key,
\ \ \ \ void\ *context);
ssize_t\ fi_writev(struct\ fid_ep\ *ep,\ const\ struct\ iovec\ *iov,\ void\ **desc,
\ \ \ \ size_t\ count,\ fi_addr_t\ dest_addr,\ uint64_t\ addr,\ uint64_t\ key,
\ \ \ \ void\ *context);
ssize_t\ fi_writemsg(struct\ fid_ep\ *ep,\ const\ struct\ fi_msg_rma\ *msg,
\ \ \ \ uint64_t\ flags);
ssize_t\ fi_inject_write(struct\ fid_ep\ *ep,\ const\ void\ *buf,\ size_t\ len,
\ \ \ \ fi_addr_t\ dest_addr,\ uint64_t\ addr,\ uint64_t\ key);
ssize_t\ fi_writedata(struct\ fid_ep\ *ep,\ const\ void\ *buf,\ size_t\ len,
\ \ \ \ void\ *desc,\ uint64_t\ data,\ fi_addr_t\ dest_addr,\ uint64_t\ addr,
\ \ \ \ uint64_t\ key,\ void\ *context);
\f[]
.fi
.SH ARGUMENTS
.IP "ep"
Fabric endpoint on which to initiate read or write operation.
.IP "buf"
Local data buffer to read into (read target) or write from (write source)
.IP "len"
Length of data to read or write, specified in bytes. Valid transfers
are from 0 bytes up to the endpoint's max_msg_size.
.IP "iov"
Vectored data buffer.
.IP "count"
Count of vectored data entries.
.IP "addr"
Address of remote memory to access.
.IP "key"
Protection key associated with the remote memory.
.IP "desc"
Descriptor associated with the local data buffer
.IP "data"
Remote CQ data to transfer with the operation.
.IP "dest_addr"
Destination address for connectionless write transfers
.IP "src_addr"
Source address to read from for connectionless transfers
.IP "msg"
Message descriptor for read and write operations.
.IP "flags"
Additional flags to apply for the read or write operation.
.IP "context"
User specified pointer to associate with the operation.
.SH "DESCRIPTION"
.PP
\f[I]ep\f[] : Fabric endpoint on which to initiate read or write
operation.
.PP
\f[I]buf\f[] : Local data buffer to read into (read target) or write
from (write source)
.PP
\f[I]len\f[] : Length of data to read or write, specified in bytes.
Valid transfers are from 0 bytes up to the endpoint\[aq]s max_msg_size.
.PP
\f[I]iov\f[] : Vectored data buffer.
.PP
\f[I]count\f[] : Count of vectored data entries.
.PP
\f[I]addr\f[] : Address of remote memory to access.
.PP
\f[I]key\f[] : Protection key associated with the remote memory.
.PP
\f[I]desc\f[] : Descriptor associated with the local data buffer
.PP
\f[I]data\f[] : Remote CQ data to transfer with the operation.
.PP
\f[I]dest_addr\f[] : Destination address for connectionless write
transfers.
Ignored for connected endpoints.
.PP
\f[I]src_addr\f[] : Source address to read from for connectionless
transfers.
Ignored for connected endpoints.
.PP
\f[I]msg\f[] : Message descriptor for read and write operations.
.PP
\f[I]flags\f[] : Additional flags to apply for the read or write
operation.
.PP
\f[I]context\f[] : User specified pointer to associate with the
operation.
.SH DESCRIPTION
.PP
RMA (remote memory access) operations are used to transfer data directly
between a local data buffer and a remote data buffer. RMA transfers occur
on a byte level granularity, and no message boundaries are maintained.
between a local data buffer and a remote data buffer.
RMA transfers occur on a byte level granularity, and no message
boundaries are maintained.
.PP
The write functions -- fi_write, fi_writev, fi_writeto,
fi_writemsg, fi_inject_write, fi_inject_writeto, fi_writedata, and fi_writedatato --
are used to transmit data into a remote
memory buffer. The main difference between write functions
are the number and type of parameters that they accept as input. Otherwise,
they perform the same general function.
The write functions -- fi_write, fi_writev, fi_writemsg,
fi_inject_write, and fi_writedata -- are used to transmit data into a
remote memory buffer.
The main difference between write functions are the number and type of
parameters that they accept as input.
Otherwise, they perform the same general function.
.PP
The read functions -- fi_read, fi_readv, fi_readfrom,
fi_readmsg -- are used to transfer data from a remote
memory region into local data buffer(s). Similar to the write operations,
read operations operate asynchronously. Users should not touch the posted
data buffer(s) until the read operation has completed.
The read functions -- fi_read, fi_readv, and fi_readmsg -- are used to
transfer data from a remote memory region into local data buffer(s).
Similar to the write operations, read operations operate asynchronously.
Users should not touch the posted data buffer(s) until the read
operation has completed.
.PP
Completed RMA operations are reported to the user through one or more event
collectors associated with the endpoint. Users provide context which are
associated with each operation, and is returned to the user
as part of the event completion. See fi_eq for completion event details.
Completed RMA operations are reported to the user through one or more
completion queues associated with the endpoint.
Users provide context which are associated with each operation, and is
returned to the user as part of the completion.
See fi_cq for completion event details.
.PP
By default, the remote endpoint does not generate an event or notify the
user when a memory region has been accessed by an RMA read or write operation.
user when a memory region has been accessed by an RMA read or write
operation.
However, immediate data may be associated with an RMA write operation.
RMA writes with immediate data will generate a completion entry at the
remote endpoint, so that the immediate data may be delivered.
.SS "fi_write"
The call fi_write transfers the data contained in the user-specified data
buffer to a remote memory region. The local endpoint must be connected to
a remote endpoint or destination before fi_write is called. Unless the
endpoint has been configured differently, the data buffer passed into
fi_write must not be touched by the application
until the fi_write call completes asynchronously.
.SS "fi_writev"
The fi_writev call adds support for a scatter-gather list to fi_write and/or
fi_writemem. The fi_writev transfers the set of data buffers referenced by
the iov parameter to the remote memory region. The format of
iov parameter is specified by the user when the endpoint is created. See
fi_getinfo for more details on iov formats.
.SS "fi_writeto"
The fi_writeto function is equivalent to fi_write for unconnected endpoints.
.SS "fi_writemsg"
The fi_writemsg call supports data transfers over both connected and unconnected
endpoints, with the ability to control the write operation per call through the
use of flags. The fi_writemsg function takes a struct fi_msg_rma as input.
.SS fi_write
.PP
The call fi_write transfers the data contained in the user-specified
data buffer to a remote memory region.
The local endpoint must be connected to a remote endpoint or destination
before fi_write is called.
Unless the endpoint has been configured differently, the data buffer
passed into fi_write must not be touched by the application until the
fi_write call completes asynchronously.
.SS fi_writev
.PP
The fi_writev call adds support for a scatter-gather list to fi_write.
The fi_writev transfers the set of data buffers referenced by the iov
parameter to the remote memory region.
.SS fi_writemsg
.PP
The fi_writemsg call supports data transfers over both connected and
unconnected endpoints, with the ability to control the write operation
per call through the use of flags.
The fi_writemsg function takes a struct fi_msg_rma as input.
.IP
.nf
struct fi_msg_rma {
const struct iovec *msg_iov; /* local scatter-gather array */
void **desc; /* operation descriptor */
size_t iov_count; /* # elements in msg_iov */
const void *addr; /* optional endpoint address */
const struct fi_rma_iov rma_iov; /* remote SGL */
size_t rma_iov_count;/* # elements in rma_iov */
void *context; /* user-defined context */
uint64_t data; /* optional immediate data */
\f[C]
struct\ fi_msg_rma\ {
\ \ \ \ const\ struct\ iovec\ *msg_iov;\ \ \ \ \ /*\ local\ scatter-gather\ array\ */
\ \ \ \ void\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ **desc;\ \ \ \ \ \ \ /*\ operation\ descriptor\ */
\ \ \ \ size_t\ \ \ \ \ \ \ \ \ \ \ \ \ iov_count;\ \ \ \ /*\ #\ elements\ in\ msg_iov\ */
\ \ \ \ const\ void\ \ \ \ \ \ \ \ \ *addr;\ \ \ \ \ \ \ \ /*\ optional\ endpoint\ address\ */
\ \ \ \ const\ struct\ fi_rma_iov\ rma_iov;\ /*\ remote\ SGL\ */
\ \ \ \ size_t\ \ \ \ \ \ \ \ \ \ \ \ \ rma_iov_count;/*\ #\ elements\ in\ rma_iov\ */
\ \ \ \ void\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ *context;\ \ \ \ \ /*\ user-defined\ context\ */
\ \ \ \ uint64_t\ \ \ \ \ \ \ \ \ \ \ data;\ \ \ \ \ \ \ \ \ /*\ optional\ immediate\ data\ */
};
struct fi_rma_iov {
uint64_t addr; /* target RMA address */
size_t len; /* size of target buffer */
uint64_t key; /* access key */
struct\ fi_rma_iov\ {
\ \ \ \ uint64_t\ \ \ \ \ \ \ \ \ \ \ addr;\ \ \ \ \ \ \ \ \ /*\ target\ RMA\ address\ */
\ \ \ \ size_t\ \ \ \ \ \ \ \ \ \ \ \ \ len;\ \ \ \ \ \ \ \ \ \ /*\ size\ of\ target\ buffer\ */
\ \ \ \ uint64_t\ \ \ \ \ \ \ \ \ \ \ key;\ \ \ \ \ \ \ \ \ \ /*\ access\ key\ */
};
\f[]
.fi
.SS "fi_inject_write"
The write inject call is an optimized version of fi_write. The fi_inject_write
function behaves as if the FI_INJECT transfer flag were set, and
FI_EVENT were not. That is, the data buffer is available for reuse
immediately on returning from from fi_inject_write, and no completion event will
be generated for this write. The completion event will be suppressed even if
the endpoint has not been configured with FI_EVENT. See the flags
discussion below for more details.
.SS "fi_inject_writeto"
This call is similar to fi_inject_write, but for unconnected endpoints.
.SS "fi_writedata"
The write data call is similar to fi_write, but allows for the sending of
remote CQ data (see FI_REMOTE_CQ_DATA flag) as part of the transfer.
.SS "fi_writedatato"
This call is similar to fi_writedata, but for unconnected endpoints.
.SS "fi_read"
The fi_read call requests that the remote endpoint transfer data from the
remote memory region into the local data buffer. The local endpoint must
be connected to a remote endpoint or destination before fi_read is called.
.SS "fi_readfrom"
The fi_readfrom call is equivalent to fi_read for unconnected endpoints.
.SS "fi_readmsg"
The fi_readmsg call supports data transfers over both connected and unconnected
endpoints, with the ability to control the read operation per call through the
use of flags. The fi_readmsg function takes a struct fi_msg_rma as input.
.SH "FLAGS"
.SS fi_inject_write
.PP
The write inject call is an optimized version of fi_write.
The fi_inject_write function behaves as if the FI_INJECT transfer flag
were set, and FI_COMPLETION were not.
That is, the data buffer is available for reuse immediately on returning
from from fi_inject_write, and no completion event will be generated for
this write.
The completion event will be suppressed even if the endpoint has not
been configured with FI_COMPLETION.
See the flags discussion below for more details.
.SS fi_writedata
.PP
The write data call is similar to fi_write, but allows for the sending
of remote CQ data (see FI_REMOTE_CQ_DATA flag) as part of the transfer.
.SS fi_read
.PP
The fi_read call requests that the remote endpoint transfer data from
the remote memory region into the local data buffer.
The local endpoint must be connected to a remote endpoint or destination
before fi_read is called.
.SS fi_readv
.PP
The fi_readv call adds support for a scatter-gather list to fi_read.
The fi_readv transfers data from the remote memory region into the set
of data buffers referenced by the iov parameter.
.SS fi_readmsg
.PP
The fi_readmsg call supports data transfers over both connected and
unconnected endpoints, with the ability to control the read operation
per call through the use of flags.
The fi_readmsg function takes a struct fi_msg_rma as input.
.SH FLAGS
.PP
The fi_readmsg and fi_writemsg calls allow the user to specify flags
which can change the default data transfer operation.
Flags specified with fi_readmsg / fi_writemsg override most flags
previously configured with the endpoint, except where noted (see fi_endpoint).
The following list of flags are usable with fi_readmsg and/or fi_writemsg.
.IP "FI_REMOTE_CQ_DATA"
Applies to fi_writemsg, fi_writedata, and fi_writedatato. Indicates that
remote CQ data is available and should
be sent as part of the request. See fi_getinfo
for additional details on FI_REMOTE_CQ_DATA.
.IP "FI_EVENT"
Indicates that a completion entry should be generated for the specified
operation. The endpoint must be bound to an event queue
with FI_EVENT that corresponds to the specified operation, or this flag
is ignored.
.IP "FI_MORE"
Indicates that the user has additional requests that will immediately be
posted after the current call returns. Use of this flag may improve
performance by enabling the provider to optimize its access to the fabric
hardware.
.IP "FI_REMOTE_SIGNAL"
Indicates that a completion event at the target process should be generated
for the given operation. The remote endpoint must be configured with
FI_REMOTE_SIGNAL, or this flag will be ignored by the target.
.IP "FI_INJECT"
Applies to fi_writemsg. Indicates that the outbound data buffer should be
returned to user immediately after the write call returns, even if the operation
is handled asynchronously. This may require that the underlying provider
implementation copy the data into a local buffer and transfer out of that
buffer.
.IP "FI_REMOTE_COMPLETE"
Applies to fi_writemsg. Indicates that a completion should not be generated
until the operation has completed on the remote side.
.SH "RETURN VALUE"
Returns 0 on success. On error, a negative value corresponding to fabric
errno is returned. Fabric errno values are defined in
.IR "rdma/fi_errno.h".
.SH "ERRORS"
.IP "-FI_EAGAIN"
Indicates that the underlying provider currently lacks the resources needed
to initiate the requested operation. This may be the result of insufficient
internal buffering, in the case of FI_SEND_BUFFERED, or processing queues
are full. The operation may be retried after additional provider resources
become available, usually through the completion of currently outstanding
previously configured with the endpoint, except where noted (see
fi_endpoint).
The following list of flags are usable with fi_readmsg and/or
fi_writemsg.
.PP
\f[I]FI_REMOTE_CQ_DATA\f[] : Applies to fi_writemsg and fi_writedata.
Indicates that remote CQ data is available and should be sent as part of
the request.
See fi_getinfo for additional details on FI_REMOTE_CQ_DATA.
.PP
\f[I]FI_COMPLETION\f[] : Indicates that a completion entry should be
generated for the specified operation.
The endpoint must be bound to an event queue with FI_COMPLETION that
corresponds to the specified operation, or this flag is ignored.
.PP
\f[I]FI_MORE\f[] : Indicates that the user has additional requests that
will immediately be posted after the current call returns.
Use of this flag may improve performance by enabling the provider to
optimize its access to the fabric hardware.
.PP
\f[I]FI_REMOTE_SIGNAL\f[] : Indicates that a completion event at the
target process should be generated for the given operation.
The remote endpoint must be configured with FI_REMOTE_SIGNAL, or this
flag will be ignored by the target.
.PP
\f[I]FI_INJECT\f[] : Applies to fi_writemsg.
Indicates that the outbound data buffer should be returned to user
immediately after the write call returns, even if the operation is
handled asynchronously.
This may require that the underlying provider implementation copy the
data into a local buffer and transfer out of that buffer.
.PP
\f[I]FI_REMOTE_COMPLETE\f[] : Applies to fi_writemsg.
Indicates that a completion should not be generated until the operation
has completed on the remote side.
.SH RETURN VALUE
.PP
Returns 0 on success.
On error, a negative value corresponding to fabric errno is returned.
Fabric errno values are defined in \f[C]rdma/fi_errno.h\f[].
.SH ERRORS
.PP
\f[I]-FI_EAGAIN\f[] : Indicates that the underlying provider currently
lacks the resources needed to initiate the requested operation.
This may be the result of insufficient internal buffering, in the case
of FI_INJECT, or processing queues are full.
The operation may be retried after additional provider resources become
available, usually through the completion of currently outstanding
operations.
.SH "SEE ALSO"
fi_getinfo(3), fi_endpoint(3), fi_domain(3), fi_eq(3)
.SH SEE ALSO
.PP
\f[C]fi_getinfo\f[](3), \f[C]fi_endpoint\f[](3), \f[C]fi_domain\f[](3),
\f[C]fi_eq\f[](3)
.SH AUTHORS
OpenFabrics.

Просмотреть файл

@ -1 +0,0 @@
.so man3/fi_msg.3

Просмотреть файл

@ -1 +0,0 @@
.so man3/fi_msg.3

Просмотреть файл

@ -0,0 +1 @@
.so man3/fi_errno.3

Просмотреть файл

@ -1,301 +1,335 @@
.TH "FI_TAGGED" 3 "2014-11-11" "Libfabric v0.0.2" "Libfabric Programmer's Manual" libfabric
.TH fi_tagged 3 "2014\-12\-02" "Libfabric Programmer\[aq]s Manual" "\@VERSION\@"
.SH NAME
fi_tagged \- Tagged data transfer operations
.PP
fi_trecv / fi_trecvv
.br
fi_trecvfrom / fi_trecvmsg
.RS
fi_tagged - Tagged data transfer operations
.TP
.B fi_trecv / fi_trecvv / fi_trecvmsg
Post a buffer to receive an incoming message
.RS
.RE
.PP
fi_tsend / fi_tsendv
.br
fi_tsendto / fi_tsendmsg
.br
fi_tinject / fi_tinjectto
.br
fi_tsenddata / fi_tsenddatato
.RS
Initiate an operation to send a message
.RE
.PP
fi_tsearch
.RS
fi_tsend / fi_tsendv / fi_tsendmsg
.PD 0
.P
.PD
fi_tinject / fi_tsenddata : Initiate an operation to send a message
.TP
.B fi_tsearch
Initiate a search operation for a buffered receive matching a given tag
.RS
.RE
.SH SYNOPSIS
.B "#include <rdma/fi_tagged.h>"
.HP
.BI "ssize_t fi_trecv(struct fid_ep *" ep ", void * " buf ", size_t " len ", void *" desc ","
.BI "uint64_t " tag ", uint64_t " ignore ", void * " context ");"
.HP
.BI "ssize_t fi_trecvv(struct fid_ep *" ep ", const struct iovec * " iov ", void *" desc ","
.BI "size_t " count ","
.BI "uint64_t " tag ", uint4_t " ignore ", void * " context ");"
.HP
.BI "ssize_t fi_trecvfrom(struct fid_ep *" ep ", const void * " buf ", size_t " len ","
.BI "void *" desc ","
.BI "fi_addr_t " src_addr ", uint64_t " tag ", uint64_t " ignore ","
.BI "void * " context ");"
.HP
.BI "ssize_t fi_trecvmsg(struct fid_ep *" ep ", const struct fi_msg_tagged * " msg ","
.BI "uint64_t " flags ");"
.PP
.HP
.BI "ssize_t fi_tsend(struct fid_ep *" ep ", const void * " buf ", size_t " len ","
.BI "void *" desc ","
.BI "uint64_t " tag ", void * " context ");"
.HP
.BI "ssize_t fi_tsendv(struct fid_ep *" ep ", const struct iovec * " iov ","
.BI "void *" desc ", size_t " count ","
.BI "uint64_t " tag ", void * " context ");"
.HP
.BI "ssize_t fi_tsendto(struct fid_ep *" ep ", const void * " buf ", size_t " len ","
.BI "void *" desc ","
.BI "fi_addr_t " dest_addr ", uint64_t " tag ", void * " context ");"
.HP
.BI "ssize_t fi_tsendmsg(struct fid_ep *" ep ", const struct fi_msg_tagged * " msg ","
.BI "uint64_t " flags ");"
.HP
.BI "ssize_t fi_tinject(struct fid_ep *" ep ", const void * " buf ", size_t " len ","
.BI "uint64_t " tag ");"
.HP
.BI "ssize_t fi_tinjectto(struct fid_ep *" ep ", const void * " buf ", size_t " len ","
.BI "fi_addr_t " dest_addr ", uint64_t " tag ");"
.HP
.BI "ssize_t fi_tsenddata(struct fid_ep *" ep ", const void * " buf ", size_t " len ","
.BI "void *" desc ", uint64_t " data ","
.BI "uint64_t " tag ", void * " context ");"
.HP
.BI "ssize_t fi_tsenddatato(struct fid_ep *" ep ", const void * " buf ", size_t " len ","
.BI "void *" desc ", uint64_t " data ","
.BI "fi_addr_t " dest_addr ", uint64_t " tag ", void * " context ");"
.HP
.BI "ssize_t fi_tsearch(struct fid_ep *" ep ", uint64_t *" tag ", uint64_t " ignore ","
.BI "uint64_t " flags ", void * " src_addr ", size_t * " src_addrlen ","
.BI "size_t * " len ", void * " context ");"
.IP
.nf
\f[C]
#include\ <rdma/fi_tagged.h>
ssize_t\ fi_trecv(struct\ fid_ep\ *ep,\ void\ *buf,\ size_t\ len,\ void\ *desc,
\ \ \ \ fi_addr_t\ src_addr,\ uint64_t\ tag,\ uint64_t\ ignore,\ void\ *context);
ssize_t\ fi_trecvv(struct\ fid_ep\ *ep,\ const\ struct\ iovec\ *iov,\ void\ *desc,
\ \ \ \ size_t\ count,\ fi_addr_t\ src_addr,\ uint64_t\ tag,\ uint4_t\ ignore,
\ \ \ \ void\ *context);
ssize_t\ fi_trecvmsg(struct\ fid_ep\ *ep,\ const\ struct\ fi_msg_tagged\ *msg,
\ \ \ \ uint64_t\ flags);
ssize_t\ fi_tsend(struct\ fid_ep\ *ep,\ const\ void\ *buf,\ size_t\ len,
\ \ \ \ void\ *desc,\ fi_addr_t\ dest_addr,\ uint64_t\ tag,\ void\ *context);
ssize_t\ fi_tsendv(struct\ fid_ep\ *ep,\ const\ struct\ iovec\ *iov,
\ \ \ \ void\ *desc,\ size_t\ count,\ fi_addr_t\ dest_addr,\ uint64_t\ tag,
\ \ \ \ void\ *context);
ssize_t\ fi_tsendmsg(struct\ fid_ep\ *ep,\ const\ struct\ fi_msg_tagged\ *msg,
\ \ \ \ uint64_t\ flags);
ssize_t\ fi_tinject(struct\ fid_ep\ *ep,\ const\ void\ *buf,\ size_t\ len,
\ \ \ \ fi_addr_t\ dest_addr,\ uint64_t\ tag);
ssize_t\ fi_tsenddata(struct\ fid_ep\ *ep,\ const\ void\ *buf,\ size_t\ len,
\ \ \ \ void\ *desc,\ uint64_t\ data,\ fi_addr_t\ dest_addr,\ uint64_t\ tag,
\ \ \ \ void\ *context);
ssize_t\ fi_tsearch(struct\ fid_ep\ *ep,\ uint64_t\ *tag,\ uint64_t\ ignore,
\ \ \ \ uint64_t\ flags,\ void\ *src_addr,\ size_t\ *src_addrlen,
\ \ \ \ size_t\ *len,\ void\ *context);
\f[]
.fi
.SH ARGUMENTS
.IP "fid"
Fabric endpoint on which to initiate tagged communication operation.
.IP "buf"
Data buffer to send or receive.
.IP "len"
Length of data buffer to send or receive.
.IP "iov"
Vectored data buffer.
.IP "count"
Count of vectored data entries.
.IP "tag"
Tag associated with the message.
.IP "ignore"
Mask of bits to ignore applied to the tag for receive operations.
.IP "desc"
Memory descriptor associated with the data buffer
.IP "data"
Remote CQ data to transfer with the sent data.
.IP "dest_addr"
Destination address for connectionless transfers
.IP "src_addr"
Source address to receive from for connectionless transfers
.IP "msg"
Message descriptor for send and receive operations.
.IP "flags"
Additional flags to apply for the send or receive operation.
.IP "context"
User specified pointer to associate with the operation.
.SH "DESCRIPTION"
Tagged messages are data transfers which carry a key or tag with
the message buffer. The tag is used at the receiving endpoint to
match the incoming message with a corresponding receive buffer.
Message tags match when the receive buffer tag is the same as
the send buffer tag with the ignored bits masked out. This
can be stated as:
.PP
send_tag & ~ignore == recv_tag & ~ignore
\f[I]fid\f[] : Fabric endpoint on which to initiate tagged communication
operation.
.PP
In general, message tags are checked against receive buffers in the order
in which messages have been posted to the endpoint. See the ordering
discussion below for more details.
\f[I]buf\f[] : Data buffer to send or receive.
.PP
The send functions -- fi_tsend, fi_tsendv, fi_tsendto, fi_tsendmsg,
fi_tinject, fi_tinjectto, fi_tsenddata, and fi_tsenddatato --
are used to transmit a tagged message
from one endpoint to another endpoint. The main difference between
send functions are the number and type of parameters that they accept as input.
\f[I]len\f[] : Length of data buffer to send or receive.
.PP
\f[I]iov\f[] : Vectored data buffer.
.PP
\f[I]count\f[] : Count of vectored data entries.
.PP
\f[I]tag\f[] : Tag associated with the message.
.PP
\f[I]ignore\f[] : Mask of bits to ignore applied to the tag for receive
operations.
.PP
\f[I]desc\f[] : Memory descriptor associated with the data buffer
.PP
\f[I]data\f[] : Remote CQ data to transfer with the sent data.
.PP
\f[I]dest_addr\f[] : Destination address for connectionless transfers.
Ignored for connected endpoints.
.PP
\f[I]src_addr\f[] : Source address to receive from for connectionless
transfers.
Ignored for connected endpoints.
.PP
\f[I]msg\f[] : Message descriptor for send and receive operations.
.PP
\f[I]flags\f[] : Additional flags to apply for the send or receive
operation.
.PP
\f[I]context\f[] : User specified pointer to associate with the
operation.
.SH DESCRIPTION
.PP
Tagged messages are data transfers which carry a key or tag with the
message buffer.
The tag is used at the receiving endpoint to match the incoming message
with a corresponding receive buffer.
Message tags match when the receive buffer tag is the same as the send
buffer tag with the ignored bits masked out.
This can be stated as:
.IP
.nf
\f[C]
send_tag\ &\ ~ignore\ ==\ recv_tag\ &\ ~ignore
\f[]
.fi
.PP
In general, message tags are checked against receive buffers in the
order in which messages have been posted to the endpoint.
See the ordering discussion below for more details.
.PP
The send functions -- fi_tsend, fi_tsendv, fi_tsendmsg, fi_tinject, and
fi_tsenddata -- are used to transmit a tagged message from one endpoint
to another endpoint.
The main difference between send functions are the number and type of
parameters that they accept as input.
Otherwise, they perform the same general function.
.PP
The receive functions -- fi_trecv, fi_trecvv, fi_trecvfrom,
fi_recvmsg -- post a data buffer to an endpoint to receive
inbound tagged messages. Similar to the send operations, receive operations
operate asynchronously. Users should not touch the posted data buffer(s)
until the receive operation has completed. Posted receive buffers are
matched with inbound send messages based on the tags associated with the
send and receive buffers.
The receive functions -- fi_trecv, fi_trecvv, fi_recvmsg -- post a data
buffer to an endpoint to receive inbound tagged messages.
Similar to the send operations, receive operations operate
asynchronously.
Users should not touch the posted data buffer(s) until the receive
operation has completed.
Posted receive buffers are matched with inbound send messages based on
the tags associated with the send and receive buffers.
.PP
Completed message operations are reported to the user through one or more event
collectors associated with the endpoint. Users provide context which are
associated with each operation, and is returned to the user
as part of the event completion. See fi_eq for completion event details.
.SS "fi_tsend"
The call fi_tsend transfers the data contained in the user-specified data
buffer to a remote endpoint, with message boundaries being maintained.
Completed message operations are reported to the user through one or
more event collectors associated with the endpoint.
Users provide context which are associated with each operation, and is
returned to the user as part of the event completion.
See fi_eq for completion event details.
.SS fi_tsend
.PP
The call fi_tsend transfers the data contained in the user-specified
data buffer to a remote endpoint, with message boundaries being
maintained.
The local endpoint must be connected to a remote endpoint or destination
before fi_tsend is called. Unless the endpoint has been configured differently,
the data buffer passed into fi_tsend must not be touched by the application
until the fi_tsend call completes asynchronously.
.SS "fi_tsendv"
The fi_tsendv call adds support for a scatter-gather list to fi_tsend and/or
fi_tsendmem. The fi_sendv transfers the set of data buffers referenced by
the iov parameter to a remote endpoint as a single message. The format of
iov parameter is specified by the user when the endpoint is created. See
fi_getinfo for more details on iov formats.
.SS "fi_tsendto"
The fi_tsendto function is equivalent to fi_tsend for unconnected endpoints.
.SS "fi_tsendmsg"
The fi_tsendmsg call supports data transfers over both connected and unconnected
endpoints, with the ability to control the send operation per call through the
use of flags. The fi_tsendmsg function takes a struct fi_msg_tagged as input.
before fi_tsend is called.
Unless the endpoint has been configured differently, the data buffer
passed into fi_tsend must not be touched by the application until the
fi_tsend call completes asynchronously.
.SS fi_tsendv
.PP
The fi_tsendv call adds support for a scatter-gather list to fi_tsend.
The fi_sendv transfers the set of data buffers referenced by the iov
parameter to a remote endpoint as a single message.
.SS fi_tsendmsg
.PP
The fi_tsendmsg call supports data transfers over both connected and
unconnected endpoints, with the ability to control the send operation
per call through the use of flags.
The fi_tsendmsg function takes a struct fi_msg_tagged as input.
.IP
.nf
struct fi_msg_tagged {
const struct iovec *msg_iov; /* scatter-gather array */
void *desc; /* data descriptor */
size_t iov_count;/* # elements in msg_iov *
const void *addr; /* optional endpoint address */
uint64_t tag; /* tag associated with message */
uint64_t ignore; /* mask applied to tag for receives */
void *context; /* user-defined context */
uint64_t data; /* optional immediate data */
\f[C]
struct\ fi_msg_tagged\ {
\ \ \ \ const\ struct\ iovec\ *msg_iov;\ /*\ scatter-gather\ array\ */
\ \ \ \ void\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ *desc;\ \ \ \ /*\ data\ descriptor\ */
\ \ \ \ size_t\ \ \ \ \ \ \ \ \ \ \ \ \ iov_count;/*\ #\ elements\ in\ msg_iov\ *
\ \ \ \ const\ void\ \ \ \ \ \ \ \ \ *addr;\ \ \ \ /*\ optional\ endpoint\ address\ */
\ \ \ \ uint64_t\ \ \ \ \ \ \ \ \ \ \ tag;\ \ \ \ \ \ /*\ tag\ associated\ with\ message\ */
\ \ \ \ uint64_t\ \ \ \ \ \ \ \ \ \ \ ignore;\ \ \ /*\ mask\ applied\ to\ tag\ for\ receives\ */
\ \ \ \ void\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ *context;\ /*\ user-defined\ context\ */
\ \ \ \ uint64_t\ \ \ \ \ \ \ \ \ \ \ data;\ \ \ \ \ /*\ optional\ immediate\ data\ */
};
\f[]
.fi
.SS "fi_tinject"
The tagged inject call is an optimized version of fi_tsend. The
fi_tinject function behaves as if the FI_INJECT transfer flag were
set, and FI_EVENT were not. That is, the data buffer is available for reuse
immediately on returning from from fi_tinject, and no completion event will
be generated for this send. The completion event will be suppressed even if
the endpoint has not been configured with FI_EVENT. See the flags
discussion below for more details.
.SS "fi_tinjectto"
This call is similar to fi_tinject, but for unconnected endpoints.
.SS "fi_tsenddata"
The tagged send data call is similar to fi_tsend, but allows for the sending of
remote CQ data (see FI_REMOTE_CQ_DATA flag) as part of the transfer.
.SS "fi_tsenddatato"
This call is similar to fi_tsenddata, but for unconnected endpoints.
.SS "fi_trecv"
The fi_trecv call posts a data buffer to the receive queue of the corresponding
endpoint. Posted receives are matched with inbound sends in the order in which
they were posted. Message boundaries are maintained. The order
in which the receives complete is dependent on the endpoint type and protocol.
.SS "f_trecvfrom"
The fi_trecvfrom call is equivalent to fi_trecv for unconnected endpoints.
It is used to indicate that a buffer should be posted to receive incoming data
from a specific remote endpoint.
.SS "fi_trecvmsg"
The fi_trecvmsg call supports posting buffers over both connected and unconnected
endpoints, with the ability to control the receive operation per call through the
use of flags. The fi_trecvmsg function takes a struct fi_msg_tagged as input.
.SS "fi_tsearch"
The function fi_tsearch determines if a message with the specified
tag with ignore mask from an optionally supplied source address has been
received and is buffered by the provider. The fi_tsearch call is only
available on endpoints with FI_BUFFERED_RECV enabled. The fi_tsearch
operation may complete asynchronously or immediately, depending on the
underlying provider implementation.
.SS fi_tinject
.PP
The tagged inject call is an optimized version of fi_tsend.
The fi_tinject function behaves as if the FI_INJECT transfer flag were
set, and FI_COMPLETION were not.
That is, the data buffer is available for reuse immediately on returning
from from fi_tinject, and no completion event will be generated for this
send.
The completion event will be suppressed even if the endpoint has not
been configured with FI_COMPLETION.
See the flags discussion below for more details.
.SS fi_tsenddata
.PP
The tagged send data call is similar to fi_tsend, but allows for the
sending of remote CQ data (see FI_REMOTE_CQ_DATA flag) as part of the
transfer.
.SS fi_trecv
.PP
The fi_trecv call posts a data buffer to the receive queue of the
corresponding endpoint.
Posted receives are searched in the order in which they were posted in
order to match sends.
Message boundaries are maintained.
The order in which the receives complete is dependent on the endpoint
type and protocol.
.SS fi_trecvv
.PP
The fi_trecvv call adds support for a scatter-gather list to fi_trecv.
The fi_trecvv posts the set of data buffers referenced by the iov
parameter to a receive incoming data.
.SS fi_trecvmsg
.PP
The fi_trecvmsg call supports posting buffers over both connected and
unconnected endpoints, with the ability to control the receive operation
per call through the use of flags.
The fi_trecvmsg function takes a struct fi_msg_tagged as input.
.SS fi_tsearch
.PP
The function fi_tsearch determines if a message with the specified tag
with ignore mask from an optionally supplied source address has been
received and is buffered by the provider.
The fi_tsearch call is only available on endpoints with FI_BUFFERED_RECV
enabled.
The fi_tsearch operation may complete asynchronously or immediately,
depending on the underlying provider implementation.
.PP
By default, a single message may be matched by multiple search
operations.
The user can restrict a message to matching with a single fi_tsearch
call by using the FI_CLAIM flag to control the search.
When set, FI_CLAIM indicates that when a search successfully finds a
matching message, the message is claimed by caller.
Subsequent searches cannot find the same message, although they may
match other messages that have the same tag.
.SH FLAGS
.PP
By default, a single message may be matched by multiple search operations.
The user can restrict a message to matching with a single fi_tsearch call
by using the FI_CLAIM flag to control the search. When set, FI_CLAIM
indicates that when a search successfully finds a matching message, the
message is claimed by caller. Subsequent searches cannot find the same
message, although they may match other messages that have the same tag.
.SH "FLAGS"
The fi_trecvmsg and fi_tsendmsg calls allow the user to specify flags
which can change the default message handling of the endpoint.
Flags specified with fi_trecvmsg / fi_tsendmsg override most flags
previously configured with the endpoint, except where noted (see fi_endpoint).
The following list of flags are usable with fi_trecvmsg and/or fi_tsendmsg.
.IP "FI_REMOTE_CQ_DATA"
Applies to fi_tsendmsg, fi_tsenddata, and fi_tsenddatato.
Indicates that remote CQ data is available and should
be sent as part of the request. See fi_getinfo
for additional details on FI_REMOTE_CQ_DATA.
.IP "FI_EVENT"
Indicates that a completion entry should be generated for the specified
operation. The endpoint must be bound to an event queue
with FI_EVENT that corresponds to the specified operation, or this flag
is ignored.
.IP "FI_MORE"
Indicates that the user has additional requests that will immediately be
posted after the current call returns. Use of this flag may improve
performance by enabling the provider to optimize its access to the fabric
hardware.
.IP "FI_REMOTE_SIGNAL"
Indicates that a completion event at the target process should be generated
for the given operation. The remote endpoint must be configured with
FI_REMOTE_SIGNAL, or this flag will be ignored by the target.
.IP "FI_INJECT"
Applies to fi_tsendmsg. Indicates that the outbound data buffer should be
returned to user immediately after the send call returns, even if the operation
is handled asynchronously. This may require that the underlying provider
implementation copy the data into a local buffer and transfer out of that
buffer.
.IP "FI_REMOTE_COMPLETE"
Applies to fi_tsendmsg. Indicates that a completion should not be generated
until the operation has completed on the remote side.
previously configured with the endpoint, except where noted (see
fi_endpoint).
The following list of flags are usable with fi_trecvmsg and/or
fi_tsendmsg.
.PP
\f[I]FI_REMOTE_CQ_DATA\f[] : Applies to fi_tsendmsg and fi_tsenddata.
Indicates that remote CQ data is available and should be sent as part of
the request.
See fi_getinfo for additional details on FI_REMOTE_CQ_DATA.
.PP
\f[I]FI_COMPLETION\f[] : Indicates that a completion entry should be
generated for the specified operation.
The endpoint must be bound to an event queue with FI_COMPLETION that
corresponds to the specified operation, or this flag is ignored.
.PP
\f[I]FI_MORE\f[] : Indicates that the user has additional requests that
will immediately be posted after the current call returns.
Use of this flag may improve performance by enabling the provider to
optimize its access to the fabric hardware.
.PP
\f[I]FI_REMOTE_SIGNAL\f[] : Indicates that a completion event at the
target process should be generated for the given operation.
The remote endpoint must be configured with FI_REMOTE_SIGNAL, or this
flag will be ignored by the target.
.PP
\f[I]FI_INJECT\f[] : Applies to fi_tsendmsg.
Indicates that the outbound data buffer should be returned to user
immediately after the send call returns, even if the operation is
handled asynchronously.
This may require that the underlying provider implementation copy the
data into a local buffer and transfer out of that buffer.
.PP
\f[I]FI_REMOTE_COMPLETE\f[] : Applies to fi_tsendmsg.
Indicates that a completion should not be generated until the operation
has completed on the remote side.
.PP
The following flags may be used with fi_tsearch.
.IP "FI_CLAIM"
Indicates that when a search successfully finds a matching message, the
message is claimed by caller. Subsequent searches cannot find the same
message, although they may match other messages that have the same tag.
.SH "RETURN VALUE"
The tagged send and receive calls return 0 on success.
On error, a negative value corresponding to fabric
.I errno
is returned. Fabric errno values are defined in
.IR "fi_errno.h".
.PP
The fi_tsearch calls returns 0 if the search was successfully
initiated asynchronously. In this case, the result of the search
will be reported through the event collector associated with the endpoint.
\f[I]FI_CLAIM\f[] : Indicates that when a search successfully finds a
matching message, the message is claimed by caller.
Subsequent searches cannot find the same message, although they may
match other messages that have the same tag.
.SH RETURN VALUE
.PP
The tagged send and receive calls return 0 on success.
On error, a negative value corresponding to fabric _errno _ is returned.
Fabric errno values are defined in \f[C]fi_errno.h\f[].
.PP
The fi_tsearch calls returns 0 if the search was successfully initiated
asynchronously.
In this case, the result of the search will be reported through the
event collector associated with the endpoint.
If the search completes immediately, fi_tsearch will return 1, with
information about the matching receive returned through the len, tag,
src_addr, and src_addrlen parameters.
.SH "ERRORS"
.IP "-FI_ENOMSG"
Returned by fi_tsearch on an immediate completion, but no matching message
was located.
.IP "-FI_EAGAIN"
Indicates that the underlying provider currently lacks the resources needed
to initiate the requested operation. This may be the result of insufficient
internal buffering, in the case of FI_SEND_BUFFERED, or processing queues
are full. The operation may be retried after additional provider resources
become available, usually through the completion of currently outstanding
operations.
.IP "-FI_EINVAL"
Indicates that an invalid argument was supplied by the user.
.IP "-FI_EOTHER"
Indicates that an unspecified error occurred.
.SH "NOTES"
.SS Any source
The function fi_trecvfrom() may be used to receive a message from a specific
source address. If the user wishes to receive a message from any source on
an unconnected fabric endpoint the function fi_recv() may be used.
.SS Ordering
The order in which tags are matched is only defined for a pair of sending and
receiving endpoints. The ordering is defined by the underlying protocol.
If a specific protocol is not selected for an endpoint, the libfabric
implementation will choose a protocol that satisfies the following requirement
from the MPI-3.0 specification (page 41, lines 1-5).
.SH ERRORS
.PP
\*(lqIf a sender sends two messages in succession to the same destination,
\f[I]-FI_ENOMSG\f[] : Returned by fi_tsearch on an immediate completion,
but no matching message was located.
.PP
\f[I]-FI_EAGAIN\f[] : Indicates that the underlying provider currently
lacks the resources needed to initiate the requested operation.
This may be the result of insufficient internal buffering, in the case
of FI_INJECT, or processing queues are full.
The operation may be retried after additional provider resources become
available, usually through the completion of currently outstanding
operations.
.PP
\f[I]-FI_EINVAL\f[] : Indicates that an invalid argument was supplied by
the user.
.PP
\f[I]-FI_EOTHER\f[] : Indicates that an unspecified error occurred.
.SH NOTES
.SS Any source
.PP
The function fi_trecv() may be used to receive a message from a specific
source address.
If the user wishes to receive a message from any source on an
unconnected fabric endpoint the function fi_recv() may be used, or
fi_trecv() may be used with the src_addr set to a wildcard address that
has been inserted into an address vector.
See fi_av.3 for more details.
.SS Ordering
.PP
The order in which tags are matched is only defined for a pair of
sending and receiving endpoints.
The ordering is defined by the underlying protocol.
If a specific protocol is not selected for an endpoint, the libfabric
implementation will choose a protocol that satisfies the following
requirement from the MPI-3.0 specification (page 41, lines 1-5):
.RS
.PP
If a sender sends two messages in succession to the same destination,
and both match the same receive, then this operation cannot receive the
second message if the first one is still pending. If a receiver posts two
receives in succession, and both match the same message, then the second
receive operation cannot be satisfied by this message, if the first one is
still pending.\*(rq
.SH "SEE ALSO"
fi_getinfo(3), fi_endpoint(3), fi_domain(3), fi_eq(3)
second message if the first one is still pending.
If a receiver posts two receives in succession, and both match the same
message, then the second receive operation cannot be satisfied by this
message, if the first one is still pending.
.RE
.SH SEE ALSO
.PP
\f[C]fi_getinfo\f[](3), \f[C]fi_endpoint\f[](3), \f[C]fi_domain\f[](3),
\f[C]fi_eq\f[](3)
.SH AUTHORS
OpenFabrics.

Просмотреть файл

@ -1 +0,0 @@
.so man3/fi_tagged.3

Просмотреть файл

@ -1 +0,0 @@
.so man3/fi_tagged.3

Просмотреть файл

@ -1,67 +1,89 @@
.TH "FI_TRIGGER" 3 "2014-11-11" "Libfabric v0.0.2" "Libfabric Programmer's Manual" libfabric
.TH fi_trigger 3 "2014-11-14" "Libfabric Programmer\[aq]s Manual" "\@VERSION\@"
.SH NAME
.PP
fi_trigger - Triggered operations
.SH SYNOPSIS
.B #include <rdma/fi_trigger.h>
.SH "DESCRIPTION"
Triggered operations allow an application to queue a data transfer request
that is deferred until a specified condition is met. It is often used to
send a message, but only after receiving all input data.
.IP
.nf
\f[C]
#include\ <rdma/fi_trigger.h>
\f[]
.fi
.SH DESCRIPTION
.PP
Triggered operations allow an application to queue a data transfer
request that is deferred until a specified condition is met.
It is often used to send a message, but only after receiving all input
data.
.PP
A triggered operation may be requested by specifying the FI_TRIGGER flag
as part of the operation. Alternatively, an endpoint alias may be created
and configured with the FI_TRIGGER flag. Such an endpoint is referred to
as a triggerable endpoint. All data transfer operations on a triggerable
endpoint are deferred.
as part of the operation.
Alternatively, an endpoint alias may be created and configured with the
FI_TRIGGER flag.
Such an endpoint is referred to as a triggerable endpoint.
All data transfer operations on a triggerable endpoint are deferred.
.PP
Any data transfer operation is potentially be triggerable, subject to
provider constraints. Triggerable endpoints are initialized such that
only those interfaces supported by the provider which are triggerable
are available.
provider constraints.
Triggerable endpoints are initialized such that only those interfaces
supported by the provider which are triggerable are available.
.PP
Triggered operations require that applications use struct fi_triggered_context
as their per operation context parameter. The use of struct
fi_triggered_context replaces struct fi_context, if required by the
provider. Although struct fi_triggered_context is not opaque to the
application, the contents of the structure may be modified by the provider.
This structure has similar requirements as struct fi_context. It must be
allocated by the application and remain valid until the corresponding
operation completes or is successfully canceled.
Triggered operations require that applications use struct
fi_triggered_context as their per operation context parameter.
The use of struct fi_triggered_context replaces struct fi_context, if
required by the provider.
Although struct fi_triggered_context is not opaque to the application,
the contents of the structure may be modified by the provider.
This structure has similar requirements as struct fi_context.
It must be allocated by the application and remain valid until the
corresponding operation completes or is successfully canceled.
.PP
Struct fi_triggered_context is used to specify the condition that must be
met before the triggered data transfer is initiated. If the condition
is met when the request is made, then the data transfer may be initiated
immediately. The format of struct fi_triggered_context is described below.
Struct fi_triggered_context is used to specify the condition that must
be met before the triggered data transfer is initiated.
If the condition is met when the request is made, then the data transfer
may be initiated immediately.
The format of struct fi_triggered_context is described below.
.IP
.nf
struct fi_triggered_context {
enum fi_trigger_event event_type; /* trigger type */
union {
struct fi_trigger_threshold threshold;
void *internal[3]; /* reserved */
};
\f[C]
struct\ fi_triggered_context\ {
\ \ \ \ enum\ fi_trigger_event\ \ \ event_type;\ \ \ /*\ trigger\ type\ */
\ \ \ \ union\ {
\ \ \ \ \ \ \ \ struct\ fi_trigger_threshold\ threshold;
\ \ \ \ \ \ \ \ void\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ *internal[3];\ /*\ reserved\ */
\ \ \ \ };
};
\f[]
.fi
The triggered context indicates the type of event assigned to the trigger,
along with a union of trigger details that is based on the event type.
.SS "TRIGGER EVENTS"
.PP
The triggered context indicates the type of event assigned to the
trigger, along with a union of trigger details that is based on the
event type.
.SS TRIGGER EVENTS
.PP
The following trigger events are defined.
.IP "FI_TRIGGER_THRESHOLD
This indicates that the data transfer operation will be deferred until an
event counter crosses an application specified threshold value. The
threshold is specified using struct fi_trigger_threshold:
.PP
\f[I]FI_TRIGGER_THRESHOL\f[] : This indicates that the data transfer
operation will be deferred until an event counter crosses an application
specified threshold value.
The threshold is specified using struct fi_trigger_threshold:
.IP
.nf
struct fi_trigger_threshold {
struct fid_cntr *cntr; /* event counter to check */
size_t threshold; /* threshold value */
\f[C]
struct\ fi_trigger_threshold\ {
\ \ \ \ struct\ fid_cntr\ *cntr;\ /*\ event\ counter\ to\ check\ */
\ \ \ \ size_t\ threshold;\ \ \ \ \ \ /*\ threshold\ value\ */
};
\f[]
.fi
.PP
Threshold operations are triggered in the order of the threshold values.
This is true even if the counter increments by a value greater than 1. If
two triggered operations have the same threshold, they will be triggered in
the order in which they were submitted to the endpoint.
.SH "SEE ALSO"
fi_getinfo(3), fi_endpoint(3), fi_alias(3), fi_cntr(3)
This is true even if the counter increments by a value greater than 1.
If two triggered operations have the same threshold, they will be
triggered in the order in which they were submitted to the endpoint.
.SH SEE ALSO
.PP
\f[C]fi_getinfo\f[](3), \f[C]fi_endpoint\f[](3), \f[C]fi_alias\f[](3),
\f[C]fi_cntr\f[](3)
.SH AUTHORS
OpenFabrics.

Просмотреть файл

@ -1 +0,0 @@
.so man3/fi_tagged.3

Просмотреть файл

@ -1 +0,0 @@
.so man3/fi_tagged.3

Просмотреть файл

@ -1,23 +1,36 @@
.TH "FI_VERSION" 3 "2014-11-11" "Libfabric v0.0.2" "Libfabric Programmer's Manual" libfabric
.TH fi_version 3 "2014-11-14" "Libfabric Programmer\[aq]s Manual" "\@VERSION\@"
.SH NAME
fi_version \- Version of the library interfaces
.PP
fi_version - Version of the library interfaces
.SH SYNOPSIS
.B "#include <rdma/fabric.h>"
.HP
.BI "uint32_t fi_version();"
.HP
.BI "FI_MAJOR(version)"
.HP
.BI "FI_MINOR(version)"
.SH "DESCRIPTION"
This call returns the current version of the library interfaces. The
version includes major and a minor numbers. These may be extracted from
the returned value using the FI_MAJOR() and FI_MINOR() macros.
.SH "NOTES"
.IP
.nf
\f[C]
#include\ <rdma/fabric.h>
uint32_t\ fi_version();
FI_MAJOR(version)
FI_MINOR(version)
\f[]
.fi
.SH DESCRIPTION
.PP
This call returns the current version of the library interfaces.
The version includes major and a minor numbers.
These may be extracted from the returned value using the FI_MAJOR() and
FI_MINOR() macros.
.SH NOTES
.PP
The library may support older versions of the interfaces.
.SH "RETURN VALUE"
Returns the current library version. The upper 16-bits of the version
correspond to the major number, and the lower 16-bits correspond with the
minor number.
.SH "SEE ALSO"
fabric(7), fi_getinfo(3)
.SH RETURN VALUE
.PP
Returns the current library version.
The upper 16-bits of the version correspond to the major number, and the
lower 16-bits correspond with the minor number.
.SH SEE ALSO
.PP
\f[C]fi_fabric\f[](7), \f[C]fi_getinfo\f[](3)
.SH AUTHORS
OpenFabrics.

Просмотреть файл

@ -262,10 +262,10 @@ struct psmx_fid_domain {
psm_mq_t psm_mq;
pthread_t ns_thread;
int ns_port;
int tagged_used:1;
int msg_used:1;
int rma_used:1;
int atomics_used:1;
struct psmx_fid_ep *tagged_ep;
struct psmx_fid_ep *msg_ep;
struct psmx_fid_ep *rma_ep;
struct psmx_fid_ep *atomics_ep;
uint64_t mode;
int am_initialized;
@ -316,6 +316,7 @@ struct psmx_cq_event_queue {
struct psmx_fid_wait {
struct fid_wait wait;
struct psmx_fid_domain *domain;
int type;
union {
int fd[2];
@ -333,6 +334,7 @@ struct psmx_poll_list {
struct psmx_fid_poll {
struct fid_poll poll;
struct psmx_fid_domain *domain;
struct dlist_entry poll_list_head;
};
@ -483,6 +485,8 @@ struct psmx_fid_cntr {
uint64_t flags;
volatile uint64_t counter;
volatile uint64_t error_counter;
uint64_t counter_last_read;
uint64_t error_counter_last_read;
struct psmx_fid_wait *wait;
struct psmx_trigger *trigger;
pthread_mutex_t trigger_lock;
@ -509,13 +513,12 @@ struct psmx_fid_ep {
struct psmx_fid_cntr *recv_cntr;
struct psmx_fid_cntr *write_cntr;
struct psmx_fid_cntr *read_cntr;
struct psmx_fid_cntr *remote_write_cntr;
struct psmx_fid_cntr *remote_read_cntr;
int send_cq_event_flag:1;
int recv_cq_event_flag:1;
int send_cntr_event_flag:1;
int recv_cntr_event_flag:1;
int write_cntr_event_flag:1;
int read_cntr_event_flag:1;
uint64_t flags;
uint64_t caps;
int connected;
psm_epid_t peer_psm_epid;
psm_epaddr_t peer_psm_epaddr;
@ -523,10 +526,6 @@ struct psmx_fid_ep {
struct fi_context nocomp_recv_context;
struct fi_context sendimm_context;
struct fi_context writeimm_context;
uint64_t pending_sends;
uint64_t pending_writes;
uint64_t pending_reads;
uint64_t pending_atomics;
size_t min_multi_recv;
};
@ -590,7 +589,8 @@ int psmx_poll_open(struct fid_domain *domain, struct fi_poll_attr *attr,
struct fid_poll **pollset);
int psmx_domain_check_features(struct psmx_fid_domain *domain, int ep_cap);
int psmx_domain_enable_features(struct psmx_fid_domain *domain, int ep_cap);
int psmx_domain_enable_ep(struct psmx_fid_domain *domain, struct psmx_fid_ep *ep);
void psmx_domain_disable_ep(struct psmx_fid_domain *domain, struct psmx_fid_ep *ep);
void *psmx_name_server(void *args);
void *psmx_resolve_name(const char *servername, int port);
void psmx_get_uuid(psm_uuid_t uuid);
@ -651,53 +651,53 @@ static inline void psmx_cntr_inc(struct psmx_fid_cntr *cntr)
psmx_wait_signal((struct fid_wait *)cntr->wait);
}
ssize_t _psmx_sendto(struct fid_ep *ep, const void *buf, size_t len,
void *desc, fi_addr_t dest_addr, void *context,
uint64_t flags);
ssize_t _psmx_recvfrom(struct fid_ep *ep, void *buf, size_t len,
void *desc, fi_addr_t src_addr, void *context,
uint64_t flags);
ssize_t _psmx_tagged_sendto(struct fid_ep *ep, const void *buf, size_t len,
void *desc, fi_addr_t dest_addr, uint64_t tag,
void *context, uint64_t flags);
ssize_t _psmx_tagged_recvfrom(struct fid_ep *ep, void *buf, size_t len,
void *desc, fi_addr_t src_addr, uint64_t tag,
uint64_t ignore, void *context, uint64_t flags);
ssize_t _psmx_writeto(struct fid_ep *ep, const void *buf, size_t len,
void *desc, fi_addr_t dest_addr,
uint64_t addr, uint64_t key, void *context,
uint64_t flags, uint64_t data);
ssize_t _psmx_readfrom(struct fid_ep *ep, void *buf, size_t len,
void *desc, fi_addr_t src_addr,
uint64_t addr, uint64_t key, void *context,
uint64_t flags);
ssize_t _psmx_atomic_writeto(struct fid_ep *ep,
const void *buf,
size_t count, void *desc,
fi_addr_t dest_addr,
uint64_t addr, uint64_t key,
enum fi_datatype datatype,
enum fi_op op, void *context,
uint64_t flags);
ssize_t _psmx_atomic_readwriteto(struct fid_ep *ep,
const void *buf,
size_t count, void *desc,
void *result, void *result_desc,
fi_addr_t dest_addr,
uint64_t addr, uint64_t key,
enum fi_datatype datatype,
enum fi_op op, void *context,
uint64_t flags);
ssize_t _psmx_atomic_compwriteto(struct fid_ep *ep,
const void *buf,
size_t count, void *desc,
const void *compare, void *compare_desc,
void *result, void *result_desc,
fi_addr_t dest_addr,
uint64_t addr, uint64_t key,
enum fi_datatype datatype,
enum fi_op op, void *context,
uint64_t flags);
ssize_t _psmx_send(struct fid_ep *ep, const void *buf, size_t len,
void *desc, fi_addr_t dest_addr, void *context,
uint64_t flags);
ssize_t _psmx_recv(struct fid_ep *ep, void *buf, size_t len,
void *desc, fi_addr_t src_addr, void *context,
uint64_t flags);
ssize_t _psmx_tagged_send(struct fid_ep *ep, const void *buf, size_t len,
void *desc, fi_addr_t dest_addr, uint64_t tag,
void *context, uint64_t flags);
ssize_t _psmx_tagged_recv(struct fid_ep *ep, void *buf, size_t len,
void *desc, fi_addr_t src_addr, uint64_t tag,
uint64_t ignore, void *context, uint64_t flags);
ssize_t _psmx_write(struct fid_ep *ep, const void *buf, size_t len,
void *desc, fi_addr_t dest_addr,
uint64_t addr, uint64_t key, void *context,
uint64_t flags, uint64_t data);
ssize_t _psmx_read(struct fid_ep *ep, void *buf, size_t len,
void *desc, fi_addr_t src_addr,
uint64_t addr, uint64_t key, void *context,
uint64_t flags);
ssize_t _psmx_atomic_write(struct fid_ep *ep,
const void *buf,
size_t count, void *desc,
fi_addr_t dest_addr,
uint64_t addr, uint64_t key,
enum fi_datatype datatype,
enum fi_op op, void *context,
uint64_t flags);
ssize_t _psmx_atomic_readwrite(struct fid_ep *ep,
const void *buf,
size_t count, void *desc,
void *result, void *result_desc,
fi_addr_t dest_addr,
uint64_t addr, uint64_t key,
enum fi_datatype datatype,
enum fi_op op, void *context,
uint64_t flags);
ssize_t _psmx_atomic_compwrite(struct fid_ep *ep,
const void *buf,
size_t count, void *desc,
const void *compare, void *compare_desc,
void *result, void *result_desc,
fi_addr_t dest_addr,
uint64_t addr, uint64_t key,
enum fi_datatype datatype,
enum fi_op op, void *context,
uint64_t flags);
#ifdef __cplusplus
}

Просмотреть файл

@ -396,6 +396,7 @@ int psmx_am_atomic_handler(psm_am_token_t token, psm_epaddr_t epaddr,
struct psmx_am_request *req;
struct psmx_cq_event *event;
struct psmx_fid_mr *mr;
struct psmx_fid_ep *target_ep;
void *tmp_buf;
switch (args[0].u32w0 & PSMX_AM_OP_MASK) {
@ -434,6 +435,10 @@ int psmx_am_atomic_handler(psm_am_token_t token, psm_epaddr_t epaddr,
}
if (mr->cntr)
psmx_cntr_inc(mr->cntr);
target_ep = mr->domain->atomics_ep;
if (target_ep->remote_write_cntr)
psmx_cntr_inc(target_ep->remote_write_cntr);
}
rep_args[0].u32w0 = PSMX_AM_REP_ATOMIC_WRITE;
@ -465,25 +470,44 @@ int psmx_am_atomic_handler(psm_am_token_t token, psm_epaddr_t epaddr,
datatype, op, count);
else
err = -ENOMEM;
if (mr->cq) {
event = psmx_cq_create_event(
mr->cq,
0, /* context */
addr,
0, /* flags */
len,
0, /* data */
0, /* tag */
0, /* olen */
0 /* err */);
if (op != FI_ATOMIC_READ) {
if (mr->cq) {
event = psmx_cq_create_event(
mr->cq,
0, /* context */
addr,
0, /* flags */
len,
0, /* data */
0, /* tag */
0, /* olen */
0 /* err */);
if (event)
psmx_cq_enqueue_event(mr->cq, event);
else
err = -ENOMEM;
if (event)
psmx_cq_enqueue_event(mr->cq, event);
else
err = -ENOMEM;
}
if (mr->cntr)
psmx_cntr_inc(mr->cntr);
}
target_ep = mr->domain->atomics_ep;
if (op == FI_ATOMIC_WRITE) {
if (target_ep->remote_write_cntr)
psmx_cntr_inc(target_ep->remote_write_cntr);
}
else if (op == FI_ATOMIC_READ) {
if (target_ep->remote_read_cntr)
psmx_cntr_inc(target_ep->remote_read_cntr);
}
else {
if (target_ep->remote_write_cntr)
psmx_cntr_inc(target_ep->remote_write_cntr);
if (target_ep->remote_read_cntr &&
target_ep->remote_read_cntr != target_ep->remote_write_cntr)
psmx_cntr_inc(target_ep->remote_read_cntr);
}
if (mr->cntr)
psmx_cntr_inc(mr->cntr);
}
else {
tmp_buf = NULL;
@ -538,6 +562,13 @@ int psmx_am_atomic_handler(psm_am_token_t token, psm_epaddr_t epaddr,
}
if (mr->cntr)
psmx_cntr_inc(mr->cntr);
target_ep = mr->domain->atomics_ep;
if (target_ep->remote_write_cntr)
psmx_cntr_inc(target_ep->remote_write_cntr);
if (target_ep->remote_read_cntr &&
target_ep->remote_read_cntr != target_ep->remote_write_cntr)
psmx_cntr_inc(target_ep->remote_read_cntr);
}
else {
tmp_buf = NULL;
@ -572,11 +603,9 @@ int psmx_am_atomic_handler(psm_am_token_t token, psm_epaddr_t epaddr,
err = -ENOMEM;
}
if (req->ep->write_cntr &&
!(req->ep->write_cntr_event_flag && req->no_event))
if (req->ep->write_cntr)
psmx_cntr_inc(req->ep->write_cntr);
req->ep->pending_atomics--;
free(req);
break;
@ -606,11 +635,9 @@ int psmx_am_atomic_handler(psm_am_token_t token, psm_epaddr_t epaddr,
err = -ENOMEM;
}
if (req->ep->read_cntr &&
!(req->ep->read_cntr_event_flag && req->no_event))
if (req->ep->read_cntr)
psmx_cntr_inc(req->ep->read_cntr);
req->ep->pending_atomics--;
free(req);
break;
@ -633,14 +660,13 @@ static int psmx_atomic_self(int am_cmd,
{
struct psmx_fid_mr *mr;
struct psmx_cq_event *event;
struct psmx_fid_ep *target_ep;
size_t len;
int no_event;
int err = 0;
int op_error;
int access;
ep->pending_atomics++;
if (am_cmd == PSMX_AM_REQ_ATOMIC_WRITE)
access = FI_REMOTE_WRITE;
else
@ -673,25 +699,46 @@ static int psmx_atomic_self(int am_cmd,
(int)datatype, (int)op, (int)count);
break;
}
if (mr->cq) {
event = psmx_cq_create_event(
mr->cq,
0, /* context */
(void *)addr,
0, /* flags */
len,
0, /* data */
0, /* tag */
0, /* olen */
0 /* err */);
if (event)
psmx_cq_enqueue_event(mr->cq, event);
else
err = -ENOMEM;
if (op != FI_ATOMIC_READ) {
if (mr->cq) {
event = psmx_cq_create_event(
mr->cq,
0, /* context */
(void *)addr,
0, /* flags */
len,
0, /* data */
0, /* tag */
0, /* olen */
0 /* err */);
if (event)
psmx_cq_enqueue_event(mr->cq, event);
else
err = -ENOMEM;
}
if (mr->cntr)
psmx_cntr_inc(mr->cntr);
}
target_ep = mr->domain->atomics_ep;
if (op == FI_ATOMIC_WRITE) {
if (target_ep->remote_write_cntr)
psmx_cntr_inc(target_ep->remote_write_cntr);
}
else if (op == FI_ATOMIC_READ) {
if (target_ep->remote_read_cntr)
psmx_cntr_inc(target_ep->remote_read_cntr);
}
else {
if (target_ep->remote_write_cntr)
psmx_cntr_inc(target_ep->remote_write_cntr);
if (am_cmd != PSMX_AM_REQ_ATOMIC_WRITE &&
target_ep->remote_read_cntr &&
target_ep->remote_read_cntr != target_ep->remote_write_cntr)
psmx_cntr_inc(target_ep->remote_read_cntr);
}
if (mr->cntr)
psmx_cntr_inc(mr->cntr);
gen_local_event:
no_event = ((flags & FI_INJECT) ||
@ -715,31 +762,27 @@ gen_local_event:
switch (am_cmd) {
case PSMX_AM_REQ_ATOMIC_WRITE:
if (ep->write_cntr &&
!(ep->write_cntr_event_flag && no_event))
if (ep->write_cntr)
psmx_cntr_inc(ep->write_cntr);
break;
case PSMX_AM_REQ_ATOMIC_READWRITE:
case PSMX_AM_REQ_ATOMIC_COMPWRITE:
if (ep->read_cntr &&
!(ep->read_cntr_event_flag && no_event))
if (ep->read_cntr)
psmx_cntr_inc(ep->read_cntr);
break;
}
ep->pending_atomics--;
return err;
}
ssize_t _psmx_atomic_writeto(struct fid_ep *ep,
const void *buf,
size_t count, void *desc,
fi_addr_t dest_addr,
uint64_t addr, uint64_t key,
enum fi_datatype datatype,
enum fi_op op, void *context,
uint64_t flags)
ssize_t _psmx_atomic_write(struct fid_ep *ep,
const void *buf,
size_t count, void *desc,
fi_addr_t dest_addr,
uint64_t addr, uint64_t key,
enum fi_datatype datatype,
enum fi_op op, void *context,
uint64_t flags)
{
struct psmx_fid_ep *ep_priv;
struct psmx_fid_av *av;
@ -751,6 +794,11 @@ ssize_t _psmx_atomic_writeto(struct fid_ep *ep,
int err;
size_t idx;
ep_priv = container_of(ep, struct psmx_fid_ep, ep);
assert(ep_priv->domain);
if (ep_priv->connected)
dest_addr = (fi_addr_t) ep_priv->peer_psm_epaddr;
if (flags & FI_TRIGGER) {
struct psmx_trigger *trigger;
struct fi_triggered_context *ctxt = context;
@ -779,9 +827,6 @@ ssize_t _psmx_atomic_writeto(struct fid_ep *ep,
return 0;
}
ep_priv = container_of(ep, struct psmx_fid_ep, ep);
assert(ep_priv->domain);
if (!buf)
return -EINVAL;
@ -851,12 +896,10 @@ ssize_t _psmx_atomic_writeto(struct fid_ep *ep,
PSMX_AM_ATOMIC_HANDLER, args, 5,
(void *)buf, len, am_flags, NULL, NULL);
ep_priv->pending_atomics++;
return 0;
}
static ssize_t psmx_atomic_writeto(struct fid_ep *ep,
static ssize_t psmx_atomic_write(struct fid_ep *ep,
const void *buf,
size_t count, void *desc,
fi_addr_t dest_addr,
@ -867,9 +910,9 @@ static ssize_t psmx_atomic_writeto(struct fid_ep *ep,
struct psmx_fid_ep *ep_priv;
ep_priv = container_of(ep, struct psmx_fid_ep, ep);
return _psmx_atomic_writeto(ep, buf, count, desc,
dest_addr, addr, key,
datatype, op, context, ep_priv->flags);
return _psmx_atomic_write(ep, buf, count, desc,
dest_addr, addr, key,
datatype, op, context, ep_priv->flags);
}
static ssize_t psmx_atomic_writemsg(struct fid_ep *ep,
@ -879,37 +922,18 @@ static ssize_t psmx_atomic_writemsg(struct fid_ep *ep,
if (!msg || msg->iov_count != 1)
return -EINVAL;
return _psmx_atomic_writeto(ep, msg->msg_iov[0].addr,
msg->msg_iov[0].count,
msg->desc ? msg->desc[0] : NULL,
msg->addr, msg->rma_iov[0].addr,
msg->rma_iov[0].key, msg->datatype,
msg->op, msg->context, flags);
}
static ssize_t psmx_atomic_write(struct fid_ep *ep,
const void *buf,
size_t count, void *desc,
uint64_t addr, uint64_t key,
enum fi_datatype datatype,
enum fi_op op, void *context)
{
struct psmx_fid_ep *ep_priv;
ep_priv = container_of(ep, struct psmx_fid_ep, ep);
assert(ep_priv->domain);
if (!ep_priv->connected)
return -ENOTCONN;
return psmx_atomic_writeto(ep, buf, count, desc,
(fi_addr_t) ep_priv->peer_psm_epaddr, addr, key,
datatype, op, context);
return _psmx_atomic_write(ep, msg->msg_iov[0].addr,
msg->msg_iov[0].count,
msg->desc ? msg->desc[0] : NULL,
msg->addr, msg->rma_iov[0].addr,
msg->rma_iov[0].key, msg->datatype,
msg->op, msg->context, flags);
}
static ssize_t psmx_atomic_writev(struct fid_ep *ep,
const struct fi_ioc *iov,
void **desc, size_t count,
fi_addr_t dest_addr,
uint64_t addr, uint64_t key,
enum fi_datatype datatype,
enum fi_op op, void *context)
@ -918,11 +942,11 @@ static ssize_t psmx_atomic_writev(struct fid_ep *ep,
return -EINVAL;
return psmx_atomic_write(ep, iov->addr, iov->count,
desc ? desc[0] : NULL, addr, key,
desc ? desc[0] : NULL, dest_addr, addr, key,
datatype, op, context);
}
static ssize_t psmx_atomic_injectto(struct fid_ep *ep,
static ssize_t psmx_atomic_inject(struct fid_ep *ep,
const void *buf,
size_t count, /*void *desc,*/
fi_addr_t dest_addr,
@ -933,40 +957,20 @@ static ssize_t psmx_atomic_injectto(struct fid_ep *ep,
struct psmx_fid_ep *ep_priv;
ep_priv = container_of(ep, struct psmx_fid_ep, ep);
return _psmx_atomic_writeto(ep, buf, count, NULL/*desc*/,
dest_addr, addr, key,
datatype, op, NULL, ep_priv->flags | FI_INJECT);
return _psmx_atomic_write(ep, buf, count, NULL/*desc*/,
dest_addr, addr, key,
datatype, op, NULL, ep_priv->flags | FI_INJECT);
}
static ssize_t psmx_atomic_inject(struct fid_ep *ep,
const void *buf,
size_t count, /*void *desc,*/
uint64_t addr, uint64_t key,
enum fi_datatype datatype,
enum fi_op op)
{
struct psmx_fid_ep *ep_priv;
ep_priv = container_of(ep, struct psmx_fid_ep, ep);
assert(ep_priv->domain);
if (!ep_priv->connected)
return -ENOTCONN;
return psmx_atomic_injectto(ep, buf, count, /*desc,*/
(fi_addr_t) ep_priv->peer_psm_epaddr, addr, key,
datatype, op);
}
ssize_t _psmx_atomic_readwriteto(struct fid_ep *ep,
const void *buf,
size_t count, void *desc,
void *result, void *result_desc,
fi_addr_t dest_addr,
uint64_t addr, uint64_t key,
enum fi_datatype datatype,
enum fi_op op, void *context,
uint64_t flags)
ssize_t _psmx_atomic_readwrite(struct fid_ep *ep,
const void *buf,
size_t count, void *desc,
void *result, void *result_desc,
fi_addr_t dest_addr,
uint64_t addr, uint64_t key,
enum fi_datatype datatype,
enum fi_op op, void *context,
uint64_t flags)
{
struct psmx_fid_ep *ep_priv;
struct psmx_fid_av *av;
@ -978,6 +982,11 @@ ssize_t _psmx_atomic_readwriteto(struct fid_ep *ep,
int err;
size_t idx;
ep_priv = container_of(ep, struct psmx_fid_ep, ep);
assert(ep_priv->domain);
if (ep_priv->connected)
dest_addr = (fi_addr_t) ep_priv->peer_psm_epaddr;
if (flags & FI_TRIGGER) {
struct psmx_trigger *trigger;
struct fi_triggered_context *ctxt = context;
@ -1008,9 +1017,6 @@ ssize_t _psmx_atomic_readwriteto(struct fid_ep *ep,
return 0;
}
ep_priv = container_of(ep, struct psmx_fid_ep, ep);
assert(ep_priv->domain);
if (!buf)
return -EINVAL;
@ -1081,12 +1087,10 @@ ssize_t _psmx_atomic_readwriteto(struct fid_ep *ep,
PSMX_AM_ATOMIC_HANDLER, args, 5,
(void *)buf, len, am_flags, NULL, NULL);
ep_priv->pending_atomics++;
return 0;
}
static ssize_t psmx_atomic_readwriteto(struct fid_ep *ep,
static ssize_t psmx_atomic_readwrite(struct fid_ep *ep,
const void *buf,
size_t count, void *desc,
void *result, void *result_desc,
@ -1098,7 +1102,7 @@ static ssize_t psmx_atomic_readwriteto(struct fid_ep *ep,
struct psmx_fid_ep *ep_priv;
ep_priv = container_of(ep, struct psmx_fid_ep, ep);
return _psmx_atomic_readwriteto(ep, buf, count, desc,
return _psmx_atomic_readwrite(ep, buf, count, desc,
result, result_desc, dest_addr,
addr, key, datatype, op,
context, ep_priv->flags);
@ -1114,7 +1118,7 @@ static ssize_t psmx_atomic_readwritemsg(struct fid_ep *ep,
if (!msg || msg->iov_count != 1)
return -EINVAL;
return _psmx_atomic_readwriteto(ep, msg->msg_iov[0].addr,
return _psmx_atomic_readwrite(ep, msg->msg_iov[0].addr,
msg->msg_iov[0].count,
msg->desc ? msg->desc[0] : NULL,
resultv[0].addr,
@ -1124,34 +1128,12 @@ static ssize_t psmx_atomic_readwritemsg(struct fid_ep *ep,
msg->op, msg->context, flags);
}
static ssize_t psmx_atomic_readwrite(struct fid_ep *ep,
const void *buf,
size_t count, void *desc,
void *result, void *result_desc,
uint64_t addr, uint64_t key,
enum fi_datatype datatype,
enum fi_op op, void *context)
{
struct psmx_fid_ep *ep_priv;
ep_priv = container_of(ep, struct psmx_fid_ep, ep);
assert(ep_priv->domain);
if (!ep_priv->connected)
return -ENOTCONN;
return psmx_atomic_readwriteto(ep, buf, count, desc,
result, result_desc,
(fi_addr_t) ep_priv->peer_psm_epaddr,
addr, key, datatype, op,
context);
}
static ssize_t psmx_atomic_readwritev(struct fid_ep *ep,
const struct fi_ioc *iov,
void **desc, size_t count,
struct fi_ioc *resultv,
void **result_desc, size_t result_count,
fi_addr_t dest_addr,
uint64_t addr, uint64_t key,
enum fi_datatype datatype,
enum fi_op op, void *context)
@ -1163,19 +1145,19 @@ static ssize_t psmx_atomic_readwritev(struct fid_ep *ep,
desc ? desc[0] : NULL,
resultv[0].addr,
result_desc ? result_desc[0] : NULL,
addr, key, datatype, op, context);
dest_addr, addr, key, datatype, op, context);
}
ssize_t _psmx_atomic_compwriteto(struct fid_ep *ep,
const void *buf,
size_t count, void *desc,
const void *compare, void *compare_desc,
void *result, void *result_desc,
fi_addr_t dest_addr,
uint64_t addr, uint64_t key,
enum fi_datatype datatype,
enum fi_op op, void *context,
uint64_t flags)
ssize_t _psmx_atomic_compwrite(struct fid_ep *ep,
const void *buf,
size_t count, void *desc,
const void *compare, void *compare_desc,
void *result, void *result_desc,
fi_addr_t dest_addr,
uint64_t addr, uint64_t key,
enum fi_datatype datatype,
enum fi_op op, void *context,
uint64_t flags)
{
struct psmx_fid_ep *ep_priv;
struct psmx_fid_av *av;
@ -1188,6 +1170,11 @@ ssize_t _psmx_atomic_compwriteto(struct fid_ep *ep,
void *tmp_buf = NULL;
size_t idx;
ep_priv = container_of(ep, struct psmx_fid_ep, ep);
assert(ep_priv->domain);
if (ep_priv->connected)
dest_addr = (fi_addr_t) ep_priv->peer_psm_epaddr;
if (flags & FI_TRIGGER) {
struct psmx_trigger *trigger;
struct fi_triggered_context *ctxt = context;
@ -1220,9 +1207,6 @@ ssize_t _psmx_atomic_compwriteto(struct fid_ep *ep,
return 0;
}
ep_priv = container_of(ep, struct psmx_fid_ep, ep);
assert(ep_priv->domain);
if (!buf)
return -EINVAL;
@ -1307,12 +1291,10 @@ ssize_t _psmx_atomic_compwriteto(struct fid_ep *ep,
len * 2, am_flags,
psmx_am_atomic_completion, tmp_buf);
ep_priv->pending_atomics++;
return 0;
}
static ssize_t psmx_atomic_compwriteto(struct fid_ep *ep,
static ssize_t psmx_atomic_compwrite(struct fid_ep *ep,
const void *buf,
size_t count, void *desc,
const void *compare, void *compare_desc,
@ -1325,7 +1307,7 @@ static ssize_t psmx_atomic_compwriteto(struct fid_ep *ep,
struct psmx_fid_ep *ep_priv;
ep_priv = container_of(ep, struct psmx_fid_ep, ep);
return _psmx_atomic_compwriteto(ep, buf, count, desc,
return _psmx_atomic_compwrite(ep, buf, count, desc,
compare, compare_desc,
result, result_desc,
dest_addr, addr, key,
@ -1345,7 +1327,7 @@ static ssize_t psmx_atomic_compwritemsg(struct fid_ep *ep,
if (!msg || msg->iov_count != 1)
return -EINVAL;
return _psmx_atomic_compwriteto(ep, msg->msg_iov[0].addr,
return _psmx_atomic_compwrite(ep, msg->msg_iov[0].addr,
msg->msg_iov[0].count,
msg->desc ? msg->desc[0] : NULL,
comparev[0].addr,
@ -1357,31 +1339,6 @@ static ssize_t psmx_atomic_compwritemsg(struct fid_ep *ep,
msg->op, msg->context, flags);
}
static ssize_t psmx_atomic_compwrite(struct fid_ep *ep,
const void *buf,
size_t count, void *desc,
const void *compare, void *compare_desc,
void *result, void *result_desc,
uint64_t addr, uint64_t key,
enum fi_datatype datatype,
enum fi_op op, void *context)
{
struct psmx_fid_ep *ep_priv;
ep_priv = container_of(ep, struct psmx_fid_ep, ep);
assert(ep_priv->domain);
if (!ep_priv->connected)
return -ENOTCONN;
return psmx_atomic_compwriteto(ep, buf, count, desc,
compare, compare_desc,
result, result_desc,
(fi_addr_t) ep_priv->peer_psm_epaddr,
addr, key, datatype, op,
context);
}
static ssize_t psmx_atomic_compwritev(struct fid_ep *ep,
const struct fi_ioc *iov,
void **desc, size_t count,
@ -1391,6 +1348,7 @@ static ssize_t psmx_atomic_compwritev(struct fid_ep *ep,
struct fi_ioc *resultv,
void **result_desc,
size_t result_count,
fi_addr_t dest_addr,
uint64_t addr, uint64_t key,
enum fi_datatype datatype,
enum fi_op op, void *context)
@ -1404,7 +1362,7 @@ static ssize_t psmx_atomic_compwritev(struct fid_ep *ep,
compare_desc ? compare_desc[0] : NULL,
resultv[0].addr,
result_desc ? result_desc[0] : NULL,
addr, key, datatype, op, context);
dest_addr, addr, key, datatype, op, context);
}
static int psmx_atomic_writevalid(struct fid_ep *ep,
@ -1528,17 +1486,13 @@ static int psmx_atomic_compwritevalid(struct fid_ep *ep,
struct fi_ops_atomic psmx_atomic_ops = {
.write = psmx_atomic_write,
.writev = psmx_atomic_writev,
.writeto = psmx_atomic_writeto,
.writemsg = psmx_atomic_writemsg,
.inject = psmx_atomic_inject,
.injectto = psmx_atomic_injectto,
.readwrite = psmx_atomic_readwrite,
.readwritev = psmx_atomic_readwritev,
.readwriteto = psmx_atomic_readwriteto,
.readwritemsg = psmx_atomic_readwritemsg,
.compwrite = psmx_atomic_compwrite,
.compwritev = psmx_atomic_compwritev,
.compwriteto = psmx_atomic_compwriteto,
.compwritemsg = psmx_atomic_compwritemsg,
.writevalid = psmx_atomic_writevalid,
.readwritevalid = psmx_atomic_readwritevalid,

Просмотреть файл

@ -272,7 +272,6 @@ static struct fi_ops psmx_fi_ops = {
.size = sizeof(struct fi_ops),
.close = psmx_av_close,
.bind = fi_no_bind,
.sync = fi_no_sync,
.control = fi_no_control,
.ops_open = fi_no_ops_open,
};

Просмотреть файл

@ -49,115 +49,115 @@ void psmx_cntr_check_trigger(struct psmx_fid_cntr *cntr)
cntr->trigger = trigger->next;
switch (trigger->op) {
case PSMX_TRIGGERED_SEND:
_psmx_sendto(trigger->send.ep,
trigger->send.buf,
trigger->send.len,
trigger->send.desc,
trigger->send.dest_addr,
trigger->send.context,
trigger->send.flags);
_psmx_send(trigger->send.ep,
trigger->send.buf,
trigger->send.len,
trigger->send.desc,
trigger->send.dest_addr,
trigger->send.context,
trigger->send.flags);
break;
case PSMX_TRIGGERED_RECV:
_psmx_recvfrom(trigger->recv.ep,
trigger->recv.buf,
trigger->recv.len,
trigger->recv.desc,
trigger->recv.src_addr,
trigger->recv.context,
trigger->recv.flags);
_psmx_recv(trigger->recv.ep,
trigger->recv.buf,
trigger->recv.len,
trigger->recv.desc,
trigger->recv.src_addr,
trigger->recv.context,
trigger->recv.flags);
break;
case PSMX_TRIGGERED_TSEND:
_psmx_tagged_sendto(trigger->tsend.ep,
trigger->tsend.buf,
trigger->tsend.len,
trigger->tsend.desc,
trigger->tsend.dest_addr,
trigger->tsend.tag,
trigger->tsend.context,
trigger->tsend.flags);
_psmx_tagged_send(trigger->tsend.ep,
trigger->tsend.buf,
trigger->tsend.len,
trigger->tsend.desc,
trigger->tsend.dest_addr,
trigger->tsend.tag,
trigger->tsend.context,
trigger->tsend.flags);
break;
case PSMX_TRIGGERED_TRECV:
_psmx_tagged_recvfrom(trigger->trecv.ep,
trigger->trecv.buf,
trigger->trecv.len,
trigger->trecv.desc,
trigger->trecv.src_addr,
trigger->trecv.tag,
trigger->trecv.ignore,
trigger->trecv.context,
trigger->trecv.flags);
_psmx_tagged_recv(trigger->trecv.ep,
trigger->trecv.buf,
trigger->trecv.len,
trigger->trecv.desc,
trigger->trecv.src_addr,
trigger->trecv.tag,
trigger->trecv.ignore,
trigger->trecv.context,
trigger->trecv.flags);
break;
case PSMX_TRIGGERED_WRITE:
_psmx_writeto(trigger->write.ep,
trigger->write.buf,
trigger->write.len,
trigger->write.desc,
trigger->write.dest_addr,
trigger->write.addr,
trigger->write.key,
trigger->write.context,
trigger->write.flags,
trigger->write.data);
_psmx_write(trigger->write.ep,
trigger->write.buf,
trigger->write.len,
trigger->write.desc,
trigger->write.dest_addr,
trigger->write.addr,
trigger->write.key,
trigger->write.context,
trigger->write.flags,
trigger->write.data);
break;
case PSMX_TRIGGERED_READ:
_psmx_readfrom(trigger->read.ep,
trigger->read.buf,
trigger->read.len,
trigger->read.desc,
trigger->read.src_addr,
trigger->read.addr,
trigger->read.key,
trigger->read.context,
trigger->read.flags);
_psmx_read(trigger->read.ep,
trigger->read.buf,
trigger->read.len,
trigger->read.desc,
trigger->read.src_addr,
trigger->read.addr,
trigger->read.key,
trigger->read.context,
trigger->read.flags);
break;
case PSMX_TRIGGERED_ATOMIC_WRITE:
_psmx_atomic_writeto(trigger->atomic_write.ep,
trigger->atomic_write.buf,
trigger->atomic_write.count,
trigger->atomic_write.desc,
trigger->atomic_write.dest_addr,
trigger->atomic_write.addr,
trigger->atomic_write.key,
trigger->atomic_write.datatype,
trigger->atomic_write.atomic_op,
trigger->atomic_write.context,
trigger->atomic_write.flags);
_psmx_atomic_write(trigger->atomic_write.ep,
trigger->atomic_write.buf,
trigger->atomic_write.count,
trigger->atomic_write.desc,
trigger->atomic_write.dest_addr,
trigger->atomic_write.addr,
trigger->atomic_write.key,
trigger->atomic_write.datatype,
trigger->atomic_write.atomic_op,
trigger->atomic_write.context,
trigger->atomic_write.flags);
break;
case PSMX_TRIGGERED_ATOMIC_READWRITE:
_psmx_atomic_readwriteto(trigger->atomic_readwrite.ep,
trigger->atomic_readwrite.buf,
trigger->atomic_readwrite.count,
trigger->atomic_readwrite.desc,
trigger->atomic_readwrite.result,
trigger->atomic_readwrite.result_desc,
trigger->atomic_readwrite.dest_addr,
trigger->atomic_readwrite.addr,
trigger->atomic_readwrite.key,
trigger->atomic_readwrite.datatype,
trigger->atomic_readwrite.atomic_op,
trigger->atomic_readwrite.context,
trigger->atomic_readwrite.flags);
_psmx_atomic_readwrite(trigger->atomic_readwrite.ep,
trigger->atomic_readwrite.buf,
trigger->atomic_readwrite.count,
trigger->atomic_readwrite.desc,
trigger->atomic_readwrite.result,
trigger->atomic_readwrite.result_desc,
trigger->atomic_readwrite.dest_addr,
trigger->atomic_readwrite.addr,
trigger->atomic_readwrite.key,
trigger->atomic_readwrite.datatype,
trigger->atomic_readwrite.atomic_op,
trigger->atomic_readwrite.context,
trigger->atomic_readwrite.flags);
break;
case PSMX_TRIGGERED_ATOMIC_COMPWRITE:
_psmx_atomic_compwriteto(trigger->atomic_compwrite.ep,
trigger->atomic_compwrite.buf,
trigger->atomic_compwrite.count,
trigger->atomic_compwrite.desc,
trigger->atomic_compwrite.compare,
trigger->atomic_compwrite.compare_desc,
trigger->atomic_compwrite.result,
trigger->atomic_compwrite.result_desc,
trigger->atomic_compwrite.dest_addr,
trigger->atomic_compwrite.addr,
trigger->atomic_compwrite.key,
trigger->atomic_compwrite.datatype,
trigger->atomic_compwrite.atomic_op,
trigger->atomic_compwrite.context,
trigger->atomic_compwrite.flags);
_psmx_atomic_compwrite(trigger->atomic_compwrite.ep,
trigger->atomic_compwrite.buf,
trigger->atomic_compwrite.count,
trigger->atomic_compwrite.desc,
trigger->atomic_compwrite.compare,
trigger->atomic_compwrite.compare_desc,
trigger->atomic_compwrite.result,
trigger->atomic_compwrite.result_desc,
trigger->atomic_compwrite.dest_addr,
trigger->atomic_compwrite.addr,
trigger->atomic_compwrite.key,
trigger->atomic_compwrite.datatype,
trigger->atomic_compwrite.atomic_op,
trigger->atomic_compwrite.context,
trigger->atomic_compwrite.flags);
break;
default:
psmx_debug("%s: %d unsupported op\n", __func__, trigger->op);
@ -199,7 +199,9 @@ static uint64_t psmx_cntr_read(struct fid_cntr *cntr)
cntr_priv = container_of(cntr, struct psmx_fid_cntr, cntr);
return cntr_priv->counter;
cntr_priv->counter_last_read = cntr_priv->counter;
return cntr_priv->counter_last_read;
}
static uint64_t psmx_cntr_readerr(struct fid_cntr *cntr)
@ -208,7 +210,9 @@ static uint64_t psmx_cntr_readerr(struct fid_cntr *cntr)
cntr_priv = container_of(cntr, struct psmx_fid_cntr, cntr);
return cntr_priv->error_counter;
cntr_priv->error_counter_last_read = cntr_priv->error_counter;
return cntr_priv->error_counter_last_read;
}
static int psmx_cntr_add(struct fid_cntr *cntr, uint64_t value)
@ -267,6 +271,9 @@ static int psmx_cntr_wait(struct fid_cntr *cntr, uint64_t threshold, int timeout
if (cntr_priv->counter >= threshold)
break;
if (timeout < 0)
continue;
clock_gettime(CLOCK_REALTIME, &ts);
msec_passed = (ts.tv_sec - ts0.tv_sec) * 1000 +
(ts.tv_nsec - ts0.tv_nsec) / 1000000;
@ -325,7 +332,6 @@ static struct fi_ops psmx_fi_ops = {
.size = sizeof(struct fi_ops),
.close = psmx_cntr_close,
.bind = fi_no_bind,
.sync = fi_no_sync,
.control = psmx_cntr_control,
};
@ -365,6 +371,7 @@ int psmx_cntr_open(struct fid_domain *domain, struct fi_cntr_attr *attr,
switch (attr->wait_obj) {
case FI_WAIT_NONE:
case FI_WAIT_UNSPEC:
break;
case FI_WAIT_SET:
@ -376,7 +383,6 @@ int psmx_cntr_open(struct fid_domain *domain, struct fi_cntr_attr *attr,
wait = (struct psmx_fid_wait *)attr->wait_set;
break;
case FI_WAIT_UNSPEC:
case FI_WAIT_FD:
case FI_WAIT_MUT_COND:
wait_attr.wait_obj = attr->wait_obj;

Просмотреть файл

@ -289,44 +289,32 @@ int psmx_cq_poll_mq(struct psmx_fid_cq *cq, struct psmx_fid_domain *domain,
switch (PSMX_CTXT_TYPE(fi_context)) {
case PSMX_NOCOMP_SEND_CONTEXT:
tmp_ep->pending_sends--;
if (!tmp_ep->send_cntr_event_flag)
tmp_cntr = tmp_ep->send_cntr;
tmp_cntr = tmp_ep->send_cntr;
break;
case PSMX_NOCOMP_RECV_CONTEXT:
if (!tmp_ep->recv_cntr_event_flag)
tmp_cntr = tmp_ep->recv_cntr;
tmp_cntr = tmp_ep->recv_cntr;
break;
case PSMX_NOCOMP_WRITE_CONTEXT:
tmp_ep->pending_writes--;
if (!tmp_ep->write_cntr_event_flag)
tmp_cntr = tmp_ep->write_cntr;
tmp_cntr = tmp_ep->write_cntr;
break;
case PSMX_NOCOMP_READ_CONTEXT:
tmp_ep->pending_reads--;
if (!tmp_ep->read_cntr_event_flag)
tmp_cntr = tmp_ep->read_cntr;
tmp_cntr = tmp_ep->read_cntr;
break;
case PSMX_INJECT_CONTEXT:
tmp_ep->pending_sends--;
if (!tmp_ep->send_cntr_event_flag)
tmp_cntr = tmp_ep->send_cntr;
tmp_cntr = tmp_ep->send_cntr;
free(fi_context);
break;
case PSMX_INJECT_WRITE_CONTEXT:
tmp_ep->pending_writes--;
if (!tmp_ep->write_cntr_event_flag)
tmp_cntr = tmp_ep->write_cntr;
tmp_cntr = tmp_ep->write_cntr;
free(fi_context);
break;
case PSMX_SEND_CONTEXT:
tmp_ep->pending_sends--;
tmp_cq = tmp_ep->send_cq;
tmp_cntr = tmp_ep->send_cntr;
break;
@ -343,13 +331,11 @@ int psmx_cq_poll_mq(struct psmx_fid_cq *cq, struct psmx_fid_domain *domain,
break;
case PSMX_READ_CONTEXT:
tmp_ep->pending_reads--;
tmp_cq = tmp_ep->send_cq;
tmp_cntr = tmp_ep->read_cntr;
break;
case PSMX_WRITE_CONTEXT:
tmp_ep->pending_writes--;
tmp_cq = tmp_ep->send_cq;
tmp_cntr = tmp_ep->write_cntr;
break;
@ -373,7 +359,9 @@ int psmx_cq_poll_mq(struct psmx_fid_cq *cq, struct psmx_fid_domain *domain,
psmx_cq_enqueue_event(mr->cq, event);
}
if (mr->cntr)
mr->cntr->cntr.ops->add(&tmp_cntr->cntr, 1);
psmx_cntr_inc(mr->cntr);
if (mr->domain->rma_ep->remote_write_cntr)
psmx_cntr_inc(mr->domain->rma_ep->remote_write_cntr);
if (!cq || mr->cq == cq)
return 1;
continue;
@ -384,20 +372,9 @@ int psmx_cq_poll_mq(struct psmx_fid_cq *cq, struct psmx_fid_domain *domain,
struct fi_context *fi_context = psm_status.context;
struct psmx_fid_mr *mr;
mr = PSMX_CTXT_USER(fi_context);
if (mr->cq) {
event = psmx_cq_create_event_from_status(
mr->cq, &psm_status, 0,
(mr->cq == cq) ? event_in : NULL,
count, src_addr);
if (!event)
return -ENOMEM;
if (event != event_in)
psmx_cq_enqueue_event(mr->cq, event);
}
if (mr->cntr)
mr->cntr->cntr.ops->add(&tmp_cntr->cntr, 1);
if (!cq || mr->cq == cq)
if (mr->domain->rma_ep->remote_read_cntr)
psmx_cntr_inc(mr->domain->rma_ep->remote_read_cntr);
if (!cq)
return 1;
continue;
}
@ -415,7 +392,7 @@ int psmx_cq_poll_mq(struct psmx_fid_cq *cq, struct psmx_fid_domain *domain,
}
if (tmp_cntr)
tmp_cntr->cntr.ops->add(&tmp_cntr->cntr, 1);
psmx_cntr_inc(tmp_cntr);
if (multi_recv) {
struct psmx_multi_recv *req;
@ -479,7 +456,7 @@ static ssize_t psmx_cq_readfrom(struct fid_cq *cq, void *buf, size_t count,
cq_priv = container_of(cq, struct psmx_fid_cq, cq);
assert(cq_priv->domain);
if (PSMX_CQ_EMPTY(cq_priv)) {
if (PSMX_CQ_EMPTY(cq_priv) || !buf) {
ret = psmx_cq_poll_mq(cq_priv, cq_priv->domain,
(struct psmx_cq_event *)buf, count, src_addr);
if (ret > 0)
@ -534,15 +511,12 @@ static ssize_t psmx_cq_read(struct fid_cq *cq, void *buf, size_t count)
}
static ssize_t psmx_cq_readerr(struct fid_cq *cq, struct fi_cq_err_entry *buf,
size_t len, uint64_t flags)
uint64_t flags)
{
struct psmx_fid_cq *cq_priv;
cq_priv = container_of(cq, struct psmx_fid_cq, cq);
if (len < sizeof *buf)
return -FI_ETOOSMALL;
if (cq_priv->pending_error) {
memcpy(buf, &cq_priv->pending_error->cqe, sizeof *buf);
free(cq_priv->pending_error);
@ -608,7 +582,9 @@ static ssize_t psmx_cq_sreadfrom(struct fid_cq *cq, void *buf, size_t count,
int timeout)
{
struct psmx_fid_cq *cq_priv;
size_t threshold;
struct timespec ts0, ts;
size_t threshold, event_count;
int msec_passed = 0;
cq_priv = container_of(cq, struct psmx_fid_cq, cq);
if (cq_priv->wait_cond == FI_CQ_COND_THRESHOLD)
@ -617,12 +593,32 @@ static ssize_t psmx_cq_sreadfrom(struct fid_cq *cq, void *buf, size_t count,
threshold = 1;
/* NOTE: "cond" is only a hint, not a mandatory condition. */
if (cq_priv->event_queue.count < threshold) {
if (cq_priv->wait)
event_count = cq_priv->event_queue.count;
if (event_count < threshold) {
if (cq_priv->wait) {
psmx_wait_wait((struct fid_wait *)cq_priv->wait, timeout);
else
while (!psmx_cq_poll_mq(cq_priv, cq_priv->domain, NULL, 0, NULL))
;
}
else {
clock_gettime(CLOCK_REALTIME, &ts0);
while (1) {
if (psmx_cq_poll_mq(cq_priv, cq_priv->domain, NULL, 0, NULL) > 0)
break;
/* CQ may be updated asynchronously by the AM handlers */
if (cq_priv->event_queue.count > event_count)
break;
if (timeout < 0)
continue;
clock_gettime(CLOCK_REALTIME, &ts);
msec_passed = (ts.tv_sec - ts0.tv_sec) * 1000 +
(ts.tv_nsec - ts0.tv_nsec) / 1000000;
if (msec_passed >= timeout)
break;
}
}
}
return psmx_cq_readfrom(cq, buf, count, src_addr);
@ -635,7 +631,7 @@ static ssize_t psmx_cq_sread(struct fid_cq *cq, void *buf, size_t count,
}
static const char *psmx_cq_strerror(struct fid_cq *cq, int prov_errno, const void *prov_data,
void *buf, size_t len)
char *buf, size_t len)
{
return psm_error_get_string(prov_errno);
}
@ -684,7 +680,6 @@ static struct fi_ops psmx_fi_ops = {
.size = sizeof(struct fi_ops),
.close = psmx_cq_close,
.bind = fi_no_bind,
.sync = fi_no_sync,
.control = psmx_cq_control,
};
@ -740,6 +735,7 @@ int psmx_cq_open(struct fid_domain *domain, struct fi_cq_attr *attr,
switch (attr->wait_obj) {
case FI_WAIT_NONE:
case FI_WAIT_UNSPEC:
break;
case FI_WAIT_SET:
@ -751,7 +747,6 @@ int psmx_cq_open(struct fid_domain *domain, struct fi_cq_attr *attr,
wait = (struct psmx_fid_wait *)attr->wait_set;
break;
case FI_WAIT_UNSPEC:
case FI_WAIT_FD:
case FI_WAIT_MUT_COND:
wait_attr.wait_obj = attr->wait_obj;

Просмотреть файл

@ -68,7 +68,6 @@ static struct fi_ops psmx_fi_ops = {
.size = sizeof(struct fi_ops),
.close = psmx_domain_close,
.bind = fi_no_bind,
.sync = fi_no_sync,
.control = fi_no_control,
};
@ -139,7 +138,7 @@ int psmx_domain_open(struct fid_fabric *fabric, struct fi_info *info,
if (err)
domain_priv->ns_thread = 0;
if (psmx_domain_enable_features(domain_priv, 0) < 0) {
if (psmx_domain_enable_ep(domain_priv, NULL) < 0) {
if (domain_priv->ns_thread) {
pthread_cancel(domain_priv->ns_thread);
pthread_join(domain_priv->ns_thread, NULL);
@ -168,23 +167,28 @@ int psmx_domain_check_features(struct psmx_fid_domain *domain, int ep_cap)
if ((ep_cap & PSMX_CAPS) != ep_cap)
return -EINVAL;
if ((ep_cap & FI_TAGGED) && domain->tagged_used)
if ((ep_cap & FI_TAGGED) && domain->tagged_ep)
return -EBUSY;
if ((ep_cap & FI_MSG) && domain->msg_used)
if ((ep_cap & FI_MSG) && domain->msg_ep)
return -EBUSY;
if ((ep_cap & FI_RMA) && domain->rma_used)
if ((ep_cap & FI_RMA) && domain->rma_ep)
return -EBUSY;
if ((ep_cap & FI_ATOMICS) && domain->atomics_used)
if ((ep_cap & FI_ATOMICS) && domain->atomics_ep)
return -EBUSY;
return 0;
}
int psmx_domain_enable_features(struct psmx_fid_domain *domain, int ep_cap)
int psmx_domain_enable_ep(struct psmx_fid_domain *domain, struct psmx_fid_ep *ep)
{
uint64_t ep_cap = 0;
if (ep)
ep_cap = ep->caps;
if (ep_cap & FI_MSG)
domain->reserved_tag_bits |= PSMX_MSG_BIT;
@ -204,17 +208,35 @@ int psmx_domain_enable_features(struct psmx_fid_domain *domain, int ep_cap)
}
if (ep_cap & FI_RMA)
domain->rma_used = 1;
domain->rma_ep = ep;
if (ep_cap & FI_ATOMICS)
domain->atomics_used = 1;
domain->atomics_ep = ep;
if (ep_cap & FI_TAGGED)
domain->tagged_used = 1;
domain->tagged_ep = ep;
if (ep_cap & FI_MSG)
domain->msg_used = 1;
domain->msg_ep = ep;
return 0;
}
void psmx_domain_disable_ep(struct psmx_fid_domain *domain, struct psmx_fid_ep *ep)
{
if (!ep)
return;
if ((ep->caps & FI_RMA) && domain->rma_ep == ep)
domain->rma_ep = NULL;
if ((ep->caps & FI_ATOMICS) && domain->atomics_ep == ep)
domain->atomics_ep = NULL;
if ((ep->caps & FI_TAGGED) && domain->tagged_ep == ep)
domain->tagged_ep = NULL;
if ((ep->caps & FI_MSG) && domain->msg_ep == ep)
domain->msg_ep = NULL;
}

Просмотреть файл

@ -146,6 +146,9 @@ static int psmx_ep_close(fid_t fid)
struct psmx_fid_ep *ep;
ep = container_of(fid, struct psmx_fid_ep, ep.fid);
psmx_domain_disable_ep(ep->domain, ep);
free(ep);
return 0;
@ -171,7 +174,7 @@ static int psmx_ep_bind(struct fid *fid, struct fid *bfid, uint64_t flags)
cq = container_of(bfid, struct psmx_fid_cq, cq.fid);
if (ep->domain != cq->domain)
return -EINVAL;
if (flags & (FI_SEND | FI_READ | FI_WRITE)) {
if (flags & FI_SEND) {
ep->send_cq = cq;
if (flags & FI_EVENT)
ep->send_cq_event_flag = 1;
@ -188,26 +191,18 @@ static int psmx_ep_bind(struct fid *fid, struct fid *bfid, uint64_t flags)
cntr = container_of(bfid, struct psmx_fid_cntr, cntr.fid);
if (ep->domain != cntr->domain)
return -EINVAL;
if (flags & FI_SEND) {
if (flags & FI_SEND)
ep->send_cntr = cntr;
if (flags & FI_EVENT)
ep->send_cntr_event_flag = 1;
}
if (flags & FI_RECV){
if (flags & FI_RECV)
ep->recv_cntr = cntr;
if (flags & FI_EVENT)
ep->recv_cntr_event_flag = 1;
}
if (flags & FI_WRITE) {
if (flags & FI_WRITE)
ep->write_cntr = cntr;
if (flags & FI_EVENT)
ep->write_cntr_event_flag = 1;
}
if (flags & FI_READ){
if (flags & FI_READ)
ep->read_cntr = cntr;
if (flags & FI_EVENT)
ep->read_cntr_event_flag = 1;
}
if (flags & FI_REMOTE_WRITE)
ep->remote_write_cntr = cntr;
if (flags & FI_REMOTE_READ)
ep->remote_read_cntr = cntr;
break;
case FI_CLASS_AV:
@ -239,35 +234,6 @@ static inline int psmx_ep_progress(struct psmx_fid_ep *ep)
return psmx_cq_poll_mq(NULL, ep->domain, NULL, 0, NULL);
}
static int psmx_ep_sync(fid_t fid, uint64_t flags, void *context)
{
struct psmx_fid_ep *ep;
ep = container_of(fid, struct psmx_fid_ep, ep.fid);
if (!flags || (flags & FI_SEND)) {
while (ep->pending_sends)
psmx_ep_progress(ep);
}
if (!flags || (flags & FI_WRITE)) {
while (ep->pending_writes)
psmx_ep_progress(ep);
}
if (!flags || (flags & FI_READ)) {
while (ep->pending_reads)
psmx_ep_progress(ep);
}
if (!flags || (flags & FI_WRITE) || (flags & FI_WRITE)) {
while (ep->pending_atomics)
psmx_ep_progress(ep);
}
return 0;
}
static int psmx_ep_control(fid_t fid, int command, void *arg)
{
struct fi_alias *alias;
@ -324,7 +290,6 @@ static struct fi_ops psmx_fi_ops = {
.size = sizeof(struct fi_ops),
.close = psmx_ep_close,
.bind = psmx_ep_bind,
.sync = psmx_ep_sync,
.control = psmx_ep_control,
};
@ -390,7 +355,9 @@ int psmx_ep_open(struct fid_domain *domain, struct fi_info *info,
if (ep_cap & FI_ATOMICS)
ep_priv->ep.atomic = &psmx_atomic_ops;
err = psmx_domain_enable_features(domain_priv, info->caps);
ep_priv->caps = ep_cap;
err = psmx_domain_enable_ep(domain_priv, ep_priv);
if (err) {
free(ep_priv);
return err;

Просмотреть файл

@ -198,7 +198,6 @@ static struct fi_ops psmx_fi_ops = {
.size = sizeof(struct fi_ops),
.close = psmx_mr_close,
.bind = psmx_mr_bind,
.sync = fi_no_sync,
.control = fi_no_control,
};
@ -271,7 +270,7 @@ static int psmx_mr_reg(struct fid_domain *domain, const void *buf, size_t len,
uint64_t key;
domain_priv = container_of(domain, struct psmx_fid_domain, domain);
if (!(domain_priv->mode & FI_PROV_MR_KEY) && psmx_mr_hash_get(requested_key))
if (!(domain_priv->mode & FI_PROV_MR_ATTR) && psmx_mr_hash_get(requested_key))
return -FI_ENOKEY;
mr_priv = (struct psmx_fid_mr *) calloc(1, sizeof(*mr_priv) + sizeof(struct iovec));
@ -282,7 +281,7 @@ static int psmx_mr_reg(struct fid_domain *domain, const void *buf, size_t len,
mr_priv->mr.fid.context = context;
mr_priv->mr.fid.ops = &psmx_fi_ops;
mr_priv->mr.mem_desc = mr_priv;
if (!(domain_priv->mode & FI_PROV_MR_KEY)) {
if (!(domain_priv->mode & FI_PROV_MR_ATTR)) {
key = requested_key;
}
else {
@ -317,7 +316,7 @@ static int psmx_mr_regv(struct fid_domain *domain,
uint64_t key;
domain_priv = container_of(domain, struct psmx_fid_domain, domain);
if (!(domain_priv->mode & FI_PROV_MR_KEY) && psmx_mr_hash_get(requested_key))
if (!(domain_priv->mode & FI_PROV_MR_ATTR) && psmx_mr_hash_get(requested_key))
return -FI_ENOKEY;
if (count == 0 || iov == NULL)
@ -333,7 +332,7 @@ static int psmx_mr_regv(struct fid_domain *domain,
mr_priv->mr.fid.context = context;
mr_priv->mr.fid.ops = &psmx_fi_ops;
mr_priv->mr.mem_desc = mr_priv;
if (!(domain_priv->mode & FI_PROV_MR_KEY)) {
if (!(domain_priv->mode & FI_PROV_MR_ATTR)) {
key = requested_key;
}
else {
@ -367,7 +366,7 @@ static int psmx_mr_regattr(struct fid_domain *domain, const struct fi_mr_attr *a
uint64_t key;
domain_priv = container_of(domain, struct psmx_fid_domain, domain);
if (!(domain_priv->mode & FI_PROV_MR_KEY) && psmx_mr_hash_get(attr->requested_key))
if (!(domain_priv->mode & FI_PROV_MR_ATTR) && psmx_mr_hash_get(attr->requested_key))
return -FI_ENOKEY;
if (!attr)
@ -385,7 +384,7 @@ static int psmx_mr_regattr(struct fid_domain *domain, const struct fi_mr_attr *a
mr_priv->mr.fid.fclass = FI_CLASS_MR;
mr_priv->mr.fid.ops = &psmx_fi_ops;
mr_priv->mr.mem_desc = mr_priv;
if (!(domain_priv->mode & FI_PROV_MR_KEY)) {
if (!(domain_priv->mode & FI_PROV_MR_ATTR)) {
key = attr->requested_key;
}
else {

Просмотреть файл

@ -32,9 +32,9 @@
#include "psmx.h"
ssize_t _psmx_recvfrom(struct fid_ep *ep, void *buf, size_t len,
void *desc, fi_addr_t src_addr, void *context,
uint64_t flags)
ssize_t _psmx_recv(struct fid_ep *ep, void *buf, size_t len,
void *desc, fi_addr_t src_addr, void *context,
uint64_t flags)
{
struct psmx_fid_ep *ep_priv;
struct psmx_fid_av *av;
@ -47,6 +47,10 @@ ssize_t _psmx_recvfrom(struct fid_ep *ep, void *buf, size_t len,
int recv_flag = 0;
size_t idx;
ep_priv = container_of(ep, struct psmx_fid_ep, ep);
if (ep_priv->connected)
src_addr = (fi_addr_t) ep_priv->peer_psm_epaddr;
if (flags & FI_TRIGGER) {
struct psmx_trigger *trigger;
struct fi_triggered_context *ctxt = context;
@ -71,8 +75,6 @@ ssize_t _psmx_recvfrom(struct fid_ep *ep, void *buf, size_t len,
return 0;
}
ep_priv = container_of(ep, struct psmx_fid_ep, ep);
if (src_addr) {
av = ep_priv->av;
if (av && av->type == FI_AV_TABLE) {
@ -137,14 +139,14 @@ ssize_t _psmx_recvfrom(struct fid_ep *ep, void *buf, size_t len,
return 0;
}
static ssize_t psmx_recvfrom(struct fid_ep *ep, void *buf, size_t len, void *desc,
fi_addr_t src_addr, void *context)
static ssize_t psmx_recv(struct fid_ep *ep, void *buf, size_t len, void *desc,
fi_addr_t src_addr, void *context)
{
struct psmx_fid_ep *ep_priv;
ep_priv = container_of(ep, struct psmx_fid_ep, ep);
return _psmx_recvfrom(ep, buf, len, desc, src_addr, context, ep_priv->flags);
return _psmx_recv(ep, buf, len, desc, src_addr, context, ep_priv->flags);
}
static ssize_t psmx_recvmsg(struct fid_ep *ep, const struct fi_msg *msg, uint64_t flags)
@ -164,27 +166,13 @@ static ssize_t psmx_recvmsg(struct fid_ep *ep, const struct fi_msg *msg, uint64_
len = 0;
}
return _psmx_recvfrom(ep, buf, len,
msg->desc ? msg->desc[0] : NULL, msg->addr,
msg->context, flags);
}
static ssize_t psmx_recv(struct fid_ep *ep, void *buf, size_t len, void *desc,
void *context)
{
struct psmx_fid_ep *ep_priv;
ep_priv = container_of(ep, struct psmx_fid_ep, ep);
if (ep_priv->connected)
return psmx_recvfrom(ep, buf, len, desc,
(fi_addr_t) ep_priv->peer_psm_epaddr, context);
else
return psmx_recvfrom(ep, buf, len, desc, 0, context);
return _psmx_recv(ep, buf, len,
msg->desc ? msg->desc[0] : NULL, msg->addr,
msg->context, flags);
}
static ssize_t psmx_recvv(struct fid_ep *ep, const struct iovec *iov, void **desc,
size_t count, void *context)
size_t count, fi_addr_t src_addr, void *context)
{
void *buf;
size_t len;
@ -201,12 +189,12 @@ static ssize_t psmx_recvv(struct fid_ep *ep, const struct iovec *iov, void **des
len = 0;
}
return psmx_recv(ep, buf, len, desc ? desc[0] : NULL, context);
return psmx_recv(ep, buf, len, desc ? desc[0] : NULL, src_addr, context);
}
ssize_t _psmx_sendto(struct fid_ep *ep, const void *buf, size_t len,
void *desc, fi_addr_t dest_addr, void *context,
uint64_t flags)
ssize_t _psmx_send(struct fid_ep *ep, const void *buf, size_t len,
void *desc, fi_addr_t dest_addr, void *context,
uint64_t flags)
{
struct psmx_fid_ep *ep_priv;
struct psmx_fid_av *av;
@ -219,6 +207,11 @@ ssize_t _psmx_sendto(struct fid_ep *ep, const void *buf, size_t len,
int err;
size_t idx;
ep_priv = container_of(ep, struct psmx_fid_ep, ep);
assert(ep_priv->domain);
if (ep_priv->connected)
dest_addr = (fi_addr_t) ep_priv->peer_psm_epaddr;
if (flags & FI_TRIGGER) {
struct psmx_trigger *trigger;
struct fi_triggered_context *ctxt = context;
@ -243,9 +236,6 @@ ssize_t _psmx_sendto(struct fid_ep *ep, const void *buf, size_t len,
return 0;
}
ep_priv = container_of(ep, struct psmx_fid_ep, ep);
assert(ep_priv->domain);
av = ep_priv->av;
if (av && av->type == FI_AV_TABLE) {
idx = (size_t)dest_addr;
@ -293,22 +283,20 @@ ssize_t _psmx_sendto(struct fid_ep *ep, const void *buf, size_t len,
if (err != PSM_OK)
return psmx_errno(err);
ep_priv->pending_sends++;
if (user_fi_context)
PSMX_CTXT_REQ(fi_context) = psm_req;
return 0;
}
static ssize_t psmx_sendto(struct fid_ep *ep, const void *buf, size_t len,
void *desc, fi_addr_t dest_addr, void *context)
static ssize_t psmx_send(struct fid_ep *ep, const void *buf, size_t len,
void *desc, fi_addr_t dest_addr, void *context)
{
struct psmx_fid_ep *ep_priv;
ep_priv = container_of(ep, struct psmx_fid_ep, ep);
return _psmx_sendto(ep, buf, len, desc, dest_addr, context, ep_priv->flags);
return _psmx_send(ep, buf, len, desc, dest_addr, context, ep_priv->flags);
}
static ssize_t psmx_sendmsg(struct fid_ep *ep, const struct fi_msg *msg, uint64_t flags)
@ -328,26 +316,13 @@ static ssize_t psmx_sendmsg(struct fid_ep *ep, const struct fi_msg *msg, uint64_
len = 0;
}
return _psmx_sendto(ep, buf, len,
msg->desc ? msg->desc[0] : NULL, msg->addr,
msg->context, flags);
}
static ssize_t psmx_send(struct fid_ep *ep, const void *buf, size_t len, void *desc,
void *context)
{
struct psmx_fid_ep *ep_priv;
ep_priv = container_of(ep, struct psmx_fid_ep, ep);
if (!ep_priv->connected)
return -ENOTCONN;
return psmx_sendto(ep, buf, len, desc, (fi_addr_t) ep_priv->peer_psm_epaddr, context);
return _psmx_send(ep, buf, len,
msg->desc ? msg->desc[0] : NULL, msg->addr,
msg->context, flags);
}
static ssize_t psmx_sendv(struct fid_ep *ep, const struct iovec *iov, void **desc,
size_t count, void *context)
size_t count, fi_addr_t dest_addr, void *context)
{
void *buf;
size_t len;
@ -364,45 +339,29 @@ static ssize_t psmx_sendv(struct fid_ep *ep, const struct iovec *iov, void **des
len = 0;
}
return psmx_send(ep, buf, len, desc ? desc[0] : NULL, context);
return psmx_send(ep, buf, len, desc ? desc[0] : NULL, dest_addr, context);
}
static ssize_t psmx_injectto(struct fid_ep *ep, const void *buf, size_t len,
fi_addr_t dest_addr)
static ssize_t psmx_inject(struct fid_ep *ep, const void *buf, size_t len,
fi_addr_t dest_addr)
{
struct psmx_fid_ep *ep_priv;
ep_priv = container_of(ep, struct psmx_fid_ep, ep);
return _psmx_sendto(ep, buf, len, NULL, dest_addr, NULL,
ep_priv->flags | FI_INJECT);
}
static ssize_t psmx_inject(struct fid_ep *ep, const void *buf, size_t len)
{
struct psmx_fid_ep *ep_priv;
ep_priv = container_of(ep, struct psmx_fid_ep, ep);
if (!ep_priv->connected)
return -ENOTCONN;
return psmx_injectto(ep, buf, len, (fi_addr_t) ep_priv->peer_psm_epaddr);
return _psmx_send(ep, buf, len, NULL, dest_addr, NULL,
ep_priv->flags | FI_INJECT);
}
struct fi_ops_msg psmx_msg_ops = {
.size = sizeof(struct fi_ops_msg),
.recv = psmx_recv,
.recvv = psmx_recvv,
.recvfrom = psmx_recvfrom,
.recvmsg = psmx_recvmsg,
.send = psmx_send,
.sendv = psmx_sendv,
.sendto = psmx_sendto,
.sendmsg = psmx_sendmsg,
.inject = psmx_inject,
.injectto = psmx_injectto,
.senddata = fi_no_msg_senddata,
.senddatato = fi_no_msg_senddatato,
};

Просмотреть файл

@ -209,8 +209,7 @@ int psmx_am_msg_handler(psm_am_token_t token, psm_epaddr_t epaddr,
err = -ENOMEM;
}
if (req->ep->recv_cntr &&
!(req->ep->recv_cntr_event_flag && req->no_event))
if (req->ep->recv_cntr)
psmx_cntr_inc(req->ep->recv_cntr);
free(req);
@ -264,12 +263,9 @@ int psmx_am_msg_handler(psm_am_token_t token, psm_epaddr_t epaddr,
err = -ENOMEM;
}
if (req->ep->send_cntr &&
!(req->ep->send_cntr_event_flag && req->no_event))
if (req->ep->send_cntr)
psmx_cntr_inc(req->ep->send_cntr);
req->ep->pending_sends--;
if (req->state == PSMX_AM_STATE_QUEUED)
req->state = PSMX_AM_STATE_DONE;
else
@ -333,9 +329,9 @@ int psmx_am_process_send(struct psmx_fid_domain *domain, struct psmx_am_request
return psmx_errno(err);
}
static ssize_t _psmx_recvfrom2(struct fid_ep *ep, void *buf, size_t len,
void *desc, fi_addr_t src_addr,
void *context, uint64_t flags)
static ssize_t _psmx_recv2(struct fid_ep *ep, void *buf, size_t len,
void *desc, fi_addr_t src_addr,
void *context, uint64_t flags)
{
psm_amarg_t args[8];
struct psmx_fid_ep *ep_priv;
@ -348,6 +344,8 @@ static ssize_t _psmx_recvfrom2(struct fid_ep *ep, void *buf, size_t len,
size_t idx;
ep_priv = container_of(ep, struct psmx_fid_ep, ep);
if (ep_priv->connected)
src_addr = (fi_addr_t) ep_priv->peer_psm_epaddr;
if (src_addr) {
av = ep_priv->av;
@ -419,8 +417,7 @@ static ssize_t _psmx_recvfrom2(struct fid_ep *ep, void *buf, size_t len,
err = -ENOMEM;
}
if (req->ep->recv_cntr &&
!(req->ep->recv_cntr_event_flag && req->no_event))
if (req->ep->recv_cntr)
psmx_cntr_inc(req->ep->recv_cntr);
free(req);
@ -429,14 +426,13 @@ static ssize_t _psmx_recvfrom2(struct fid_ep *ep, void *buf, size_t len,
return err;
}
static ssize_t psmx_recvfrom2(struct fid_ep *ep, void *buf, size_t len,
void *desc, fi_addr_t src_addr,
void *context)
static ssize_t psmx_recv2(struct fid_ep *ep, void *buf, size_t len,
void *desc, fi_addr_t src_addr, void *context)
{
struct psmx_fid_ep *ep_priv;
ep_priv = container_of(ep, struct psmx_fid_ep, ep);
return _psmx_recvfrom2(ep, buf, len, desc, src_addr, context, ep_priv->flags);
return _psmx_recv2(ep, buf, len, desc, src_addr, context, ep_priv->flags);
}
static ssize_t psmx_recvmsg2(struct fid_ep *ep, const struct fi_msg *msg,
@ -457,27 +453,13 @@ static ssize_t psmx_recvmsg2(struct fid_ep *ep, const struct fi_msg *msg,
len = 0;
}
return _psmx_recvfrom2(ep, buf, len,
msg->desc, msg->addr, msg->context, flags);
}
static ssize_t psmx_recv2(struct fid_ep *ep, void *buf, size_t len,
void *desc, void *context)
{
struct psmx_fid_ep *ep_priv;
ep_priv = container_of(ep, struct psmx_fid_ep, ep);
assert(ep_priv->domain);
if (ep_priv->connected)
return psmx_recvfrom2(ep, buf, len, desc,
(fi_addr_t) ep_priv->peer_psm_epaddr, context);
else
return psmx_recvfrom2(ep, buf, len, desc, 0, context);
return _psmx_recv2(ep, buf, len,
msg->desc, msg->addr, msg->context, flags);
}
static ssize_t psmx_recvv2(struct fid_ep *ep, const struct iovec *iov,
void **desc, size_t count, void *context)
void **desc, size_t count, fi_addr_t src_addr,
void *context)
{
void *buf;
size_t len;
@ -494,12 +476,12 @@ static ssize_t psmx_recvv2(struct fid_ep *ep, const struct iovec *iov,
len = 0;
}
return psmx_recv2(ep, buf, len, desc ? desc[0] : NULL, context);
return psmx_recv2(ep, buf, len, desc ? desc[0] : NULL, src_addr, context);
}
static ssize_t _psmx_sendto2(struct fid_ep *ep, const void *buf, size_t len,
void *desc, fi_addr_t dest_addr,
void *context, uint64_t flags)
static ssize_t _psmx_send2(struct fid_ep *ep, const void *buf, size_t len,
void *desc, fi_addr_t dest_addr,
void *context, uint64_t flags)
{
struct psmx_fid_ep *ep_priv;
struct psmx_fid_av *av;
@ -512,6 +494,8 @@ static ssize_t _psmx_sendto2(struct fid_ep *ep, const void *buf, size_t len,
ep_priv = container_of(ep, struct psmx_fid_ep, ep);
assert(ep_priv->domain);
if (ep_priv->connected)
dest_addr = (fi_addr_t) ep_priv->peer_psm_epaddr;
if (!buf)
return -EINVAL;
@ -557,8 +541,6 @@ static ssize_t _psmx_sendto2(struct fid_ep *ep, const void *buf, size_t len,
PSMX_AM_MSG_HANDLER, args, 4,
(void *)buf, msg_size, am_flags, NULL, NULL);
ep_priv->pending_sends++;
#if ! PSMX_AM_USE_SEND_QUEUE
if (len > msg_size) {
while (!req->send.peer_ready)
@ -572,14 +554,14 @@ static ssize_t _psmx_sendto2(struct fid_ep *ep, const void *buf, size_t len,
}
static ssize_t psmx_sendto2(struct fid_ep *ep, const void *buf,
size_t len, void *desc,
fi_addr_t dest_addr, void *context)
static ssize_t psmx_send2(struct fid_ep *ep, const void *buf,
size_t len, void *desc,
fi_addr_t dest_addr, void *context)
{
struct psmx_fid_ep *ep_priv;
ep_priv = container_of(ep, struct psmx_fid_ep, ep);
return _psmx_sendto2(ep, buf, len, desc, dest_addr, context, ep_priv->flags);
return _psmx_send2(ep, buf, len, desc, dest_addr, context, ep_priv->flags);
}
static ssize_t psmx_sendmsg2(struct fid_ep *ep, const struct fi_msg *msg,
@ -600,26 +582,13 @@ static ssize_t psmx_sendmsg2(struct fid_ep *ep, const struct fi_msg *msg,
len = 0;
}
return _psmx_sendto2(ep, buf, len,
msg->desc, msg->addr, msg->context, flags);
}
static ssize_t psmx_send2(struct fid_ep *ep, const void *buf, size_t len,
void *desc, void *context)
{
struct psmx_fid_ep *ep_priv;
ep_priv = container_of(ep, struct psmx_fid_ep, ep);
assert(ep_priv->domain);
if (!ep_priv->connected)
return -ENOTCONN;
return psmx_sendto2(ep, buf, len, desc, (fi_addr_t) ep_priv->peer_psm_epaddr, context);
return _psmx_send2(ep, buf, len,
msg->desc, msg->addr, msg->context, flags);
}
static ssize_t psmx_sendv2(struct fid_ep *ep, const struct iovec *iov,
void **desc, size_t count, void *context)
void **desc, size_t count, fi_addr_t dest_addr,
void *context)
{
void *buf;
size_t len;
@ -636,46 +605,30 @@ static ssize_t psmx_sendv2(struct fid_ep *ep, const struct iovec *iov,
len = 0;
}
return psmx_send2(ep, buf, len, desc ? desc[0] : NULL, context);
return psmx_send2(ep, buf, len, desc ? desc[0] : NULL, dest_addr, context);
}
static ssize_t psmx_injectto2(struct fid_ep *ep, const void *buf, size_t len,
fi_addr_t dest_addr)
static ssize_t psmx_inject2(struct fid_ep *ep, const void *buf, size_t len,
fi_addr_t dest_addr)
{
struct psmx_fid_ep *ep_priv;
ep_priv = container_of(ep, struct psmx_fid_ep, ep);
/* TODO: optimize it & guarantee buffered */
return _psmx_sendto2(ep, buf, len, NULL, dest_addr, &ep_priv->sendimm_context,
ep_priv->flags | FI_INJECT);
}
static ssize_t psmx_inject2(struct fid_ep *ep, const void *buf, size_t len)
{
struct psmx_fid_ep *ep_priv;
ep_priv = container_of(ep, struct psmx_fid_ep, ep);
if (!ep_priv->connected)
return -ENOTCONN;
return psmx_injectto2(ep, buf, len, (fi_addr_t) ep_priv->peer_psm_epaddr);
return _psmx_send2(ep, buf, len, NULL, dest_addr, &ep_priv->sendimm_context,
ep_priv->flags | FI_INJECT);
}
struct fi_ops_msg psmx_msg2_ops = {
.size = sizeof(struct fi_ops_msg),
.recv = psmx_recv2,
.recvv = psmx_recvv2,
.recvfrom = psmx_recvfrom2,
.recvmsg = psmx_recvmsg2,
.send = psmx_send2,
.sendv = psmx_sendv2,
.sendto = psmx_sendto2,
.sendmsg = psmx_sendmsg2,
.inject = psmx_inject2,
.injectto = psmx_injectto2,
.senddata = fi_no_msg_senddata,
.senddatato = fi_no_msg_senddatato,
};

Просмотреть файл

@ -74,13 +74,42 @@ int psmx_poll_del(struct fid_poll *pollset, struct fid *event_fid, uint64_t flag
static int psmx_poll_poll(struct fid_poll *pollset, void **context, int count)
{
struct psmx_fid_poll *poll_priv;
int err = 0;
struct psmx_fid_cq *cq;
struct psmx_fid_cntr *cntr;
struct psmx_poll_list *list_item;
struct dlist_entry *p, *head;
int ret_count = 0;
poll_priv = container_of(pollset, struct psmx_fid_poll, poll.fid);
/* TODO: poll them all! */
psmx_cq_poll_mq(NULL, poll_priv->domain, NULL, 0, NULL);
return err;
head = &poll_priv->poll_list_head;
for (p = head->next; p != head && ret_count < count; p = p->next) {
list_item = container_of(p, struct psmx_poll_list, entry);
switch (list_item->fid->fclass) {
case FI_CLASS_CQ:
cq = container_of(list_item->fid, struct psmx_fid_cq, cq);
if (cq->event_queue.count) {
*context++ = cq->cq.fid.context;
ret_count++;
}
break;
case FI_CLASS_CNTR:
cntr = container_of(list_item->fid, struct psmx_fid_cntr, cntr);
if (cntr->counter != cntr->counter_last_read) {
*context++ = cntr->cntr.fid.context;
ret_count++;
}
break;
default:
break;
}
}
return ret_count;
}
static int psmx_poll_close(fid_t fid)
@ -107,7 +136,6 @@ static struct fi_ops psmx_fi_ops = {
.size = sizeof(struct fi_ops),
.close = psmx_poll_close,
.bind = fi_no_bind,
.sync = fi_no_sync,
.control = fi_no_control,
.ops_open = fi_no_ops_open,
};
@ -134,6 +162,7 @@ int psmx_poll_open(struct fid_domain *domain, struct fi_poll_attr *attr,
poll_priv->poll.fid.context = 0;
poll_priv->poll.fid.ops = &psmx_fi_ops;
poll_priv->poll.ops = &psmx_poll_ops;
poll_priv->domain = domain_priv;
*pollset = &poll_priv->poll;
return 0;

Просмотреть файл

@ -116,6 +116,8 @@ int psmx_am_rma_handler(psm_am_token_t token, psm_epaddr_t epaddr,
}
if (mr->cntr)
psmx_cntr_inc(mr->cntr);
if (mr->domain->rma_ep->remote_write_cntr)
psmx_cntr_inc(mr->domain->rma_ep->remote_write_cntr);
}
}
if (eom || op_error) {
@ -193,26 +195,8 @@ int psmx_am_rma_handler(psm_am_token_t token, psm_epaddr_t epaddr,
NULL, NULL );
if (eom && !op_error) {
if (mr->cq) {
/* TODO: report the addr/len of the whole read */
event = psmx_cq_create_event(
mr->cq,
0, /* context */
rma_addr,
0, /* flags */
rma_len,
0, /* data */
0, /* tag */
0, /* olen */
0);
if (event)
psmx_cq_enqueue_event(mr->cq, event);
else
err = -ENOMEM;
}
if (mr->cntr)
psmx_cntr_inc(mr->cntr);
if (mr->domain->rma_ep->remote_read_cntr)
psmx_cntr_inc(mr->domain->rma_ep->remote_read_cntr);
}
break;
@ -278,11 +262,9 @@ int psmx_am_rma_handler(psm_am_token_t token, psm_epaddr_t epaddr,
err = -ENOMEM;
}
if (req->ep->write_cntr &&
!(req->ep->write_cntr_event_flag && req->no_event))
if (req->ep->write_cntr)
psmx_cntr_inc(req->ep->write_cntr);
req->ep->pending_writes--;
free(req);
}
break;
@ -316,11 +298,9 @@ int psmx_am_rma_handler(psm_am_token_t token, psm_epaddr_t epaddr,
err = -ENOMEM;
}
if (req->ep->read_cntr &&
!(req->ep->read_cntr_event_flag && req->no_event))
if (req->ep->read_cntr)
psmx_cntr_inc(req->ep->read_cntr);
req->ep->pending_reads--;
free(req);
}
break;
@ -339,6 +319,7 @@ static ssize_t psmx_rma_self(int am_cmd,
{
struct psmx_fid_mr *mr;
struct psmx_cq_event *event;
struct psmx_fid_cntr *cntr;
int no_event;
int err = 0;
int op_error = 0;
@ -347,11 +328,9 @@ static ssize_t psmx_rma_self(int am_cmd,
switch (am_cmd) {
case PSMX_AM_REQ_WRITE:
ep->pending_writes++;
access = FI_REMOTE_WRITE;
break;
case PSMX_AM_REQ_READ:
ep->pending_reads++;
access = FI_REMOTE_READ;
break;
default:
@ -366,15 +345,17 @@ static ssize_t psmx_rma_self(int am_cmd,
if (am_cmd == PSMX_AM_REQ_WRITE) {
dst = (void *)addr;
src = buf;
cntr = mr->domain->rma_ep->remote_write_cntr;
}
else {
dst = buf;
src = (void *)addr;
cntr = mr->domain->rma_ep->remote_read_cntr;
}
memcpy(dst, src, len);
if (mr->cq) {
if (mr->cq && am_cmd == PSMX_AM_REQ_WRITE) {
event = psmx_cq_create_event(
mr->cq,
0, /* context */
@ -391,8 +372,10 @@ static ssize_t psmx_rma_self(int am_cmd,
else
err = -ENOMEM;
}
if (mr->cntr)
if (mr->cntr && am_cmd == PSMX_AM_REQ_WRITE)
psmx_cntr_inc(mr->cntr);
if (cntr)
psmx_cntr_inc(cntr);
}
no_event = (flags & FI_INJECT) ||
@ -417,17 +400,13 @@ static ssize_t psmx_rma_self(int am_cmd,
switch (am_cmd) {
case PSMX_AM_REQ_WRITE:
if (ep->write_cntr &&
!(ep->write_cntr_event_flag && no_event))
if (ep->write_cntr)
psmx_cntr_inc(ep->write_cntr);
ep->pending_writes--;
break;
case PSMX_AM_REQ_READ:
if (ep->read_cntr &&
!(ep->read_cntr_event_flag && no_event))
if (ep->read_cntr)
psmx_cntr_inc(ep->read_cntr);
ep->pending_reads--;
break;
}
@ -454,10 +433,9 @@ int psmx_am_process_rma(struct psmx_fid_domain *domain, struct psmx_am_request *
return psmx_errno(err);
}
ssize_t _psmx_readfrom(struct fid_ep *ep, void *buf, size_t len,
void *desc, fi_addr_t src_addr,
uint64_t addr, uint64_t key, void *context,
uint64_t flags)
ssize_t _psmx_read(struct fid_ep *ep, void *buf, size_t len,
void *desc, fi_addr_t src_addr,
uint64_t addr, uint64_t key, void *context, uint64_t flags)
{
struct psmx_fid_ep *ep_priv;
struct psmx_fid_av *av;
@ -471,6 +449,11 @@ ssize_t _psmx_readfrom(struct fid_ep *ep, void *buf, size_t len,
psm_mq_req_t psm_req;
size_t idx;
ep_priv = container_of(ep, struct psmx_fid_ep, ep);
assert(ep_priv->domain);
if (ep_priv->connected)
src_addr = (fi_addr_t) ep_priv->peer_psm_epaddr;
if (flags & FI_TRIGGER) {
struct psmx_trigger *trigger;
struct fi_triggered_context *ctxt = context;
@ -497,9 +480,6 @@ ssize_t _psmx_readfrom(struct fid_ep *ep, void *buf, size_t len,
return 0;
}
ep_priv = container_of(ep, struct psmx_fid_ep, ep);
assert(ep_priv->domain);
if (!buf)
return -EINVAL;
@ -557,7 +537,6 @@ ssize_t _psmx_readfrom(struct fid_ep *ep, void *buf, size_t len,
PSMX_AM_RMA_HANDLER, args, 5, NULL, 0,
PSM_AM_FLAG_NOREPLY, NULL, NULL);
ep_priv->pending_reads++;
return 0;
}
@ -584,11 +563,10 @@ ssize_t _psmx_readfrom(struct fid_ep *ep, void *buf, size_t len,
PSMX_AM_RMA_HANDLER, args, 5, NULL, 0,
0, NULL, NULL);
ep_priv->pending_reads++;
return 0;
}
static ssize_t psmx_readfrom(struct fid_ep *ep, void *buf, size_t len,
static ssize_t psmx_read(struct fid_ep *ep, void *buf, size_t len,
void *desc, fi_addr_t src_addr,
uint64_t addr, uint64_t key, void *context)
{
@ -596,8 +574,8 @@ static ssize_t psmx_readfrom(struct fid_ep *ep, void *buf, size_t len,
ep_priv = container_of(ep, struct psmx_fid_ep, ep);
return _psmx_readfrom(ep, buf, len, desc, src_addr, addr,
key, context, ep_priv->flags);
return _psmx_read(ep, buf, len, desc, src_addr, addr,
key, context, ep_priv->flags);
}
static ssize_t psmx_readmsg(struct fid_ep *ep, const struct fi_msg_rma *msg,
@ -606,41 +584,25 @@ static ssize_t psmx_readmsg(struct fid_ep *ep, const struct fi_msg_rma *msg,
if (!msg || msg->iov_count != 1)
return -EINVAL;
return _psmx_readfrom(ep, msg->msg_iov[0].iov_base,
msg->msg_iov[0].iov_len,
msg->desc ? msg->desc[0] : NULL, msg->addr,
msg->rma_iov[0].addr, msg->rma_iov[0].key,
msg->context, flags);
}
static ssize_t psmx_read(struct fid_ep *ep, void *buf, size_t len,
void *desc, uint64_t addr, uint64_t key,
void *context)
{
struct psmx_fid_ep *ep_priv;
ep_priv = container_of(ep, struct psmx_fid_ep, ep);
assert(ep_priv->domain);
if (!ep_priv->connected)
return -ENOTCONN;
return psmx_readfrom(ep, buf, len, desc, (fi_addr_t) ep_priv->peer_psm_epaddr,
addr, key, context);
return _psmx_read(ep, msg->msg_iov[0].iov_base,
msg->msg_iov[0].iov_len,
msg->desc ? msg->desc[0] : NULL, msg->addr,
msg->rma_iov[0].addr, msg->rma_iov[0].key,
msg->context, flags);
}
static ssize_t psmx_readv(struct fid_ep *ep, const struct iovec *iov,
void **desc, size_t count, uint64_t addr,
uint64_t key, void *context)
void **desc, size_t count, fi_addr_t src_addr,
uint64_t addr, uint64_t key, void *context)
{
if (!iov || count != 1)
return -EINVAL;
return psmx_read(ep, iov->iov_base, iov->iov_len,
desc ? desc[0] : NULL, addr, key, context);
desc ? desc[0] : NULL, src_addr, addr, key, context);
}
ssize_t _psmx_writeto(struct fid_ep *ep, const void *buf, size_t len,
ssize_t _psmx_write(struct fid_ep *ep, const void *buf, size_t len,
void *desc, fi_addr_t dest_addr,
uint64_t addr, uint64_t key, void *context,
uint64_t flags, uint64_t data)
@ -658,6 +620,11 @@ ssize_t _psmx_writeto(struct fid_ep *ep, const void *buf, size_t len,
uint64_t psm_tag;
size_t idx;
ep_priv = container_of(ep, struct psmx_fid_ep, ep);
assert(ep_priv->domain);
if (ep_priv->connected)
dest_addr = (fi_addr_t) ep_priv->peer_psm_epaddr;
if (flags & FI_TRIGGER) {
struct psmx_trigger *trigger;
struct fi_triggered_context *ctxt = context;
@ -685,9 +652,6 @@ ssize_t _psmx_writeto(struct fid_ep *ep, const void *buf, size_t len,
return 0;
}
ep_priv = container_of(ep, struct psmx_fid_ep, ep);
assert(ep_priv->domain);
if (!buf)
return -EINVAL;
@ -768,7 +732,6 @@ ssize_t _psmx_writeto(struct fid_ep *ep, const void *buf, size_t len,
psm_mq_isend(ep_priv->domain->psm_mq, (psm_epaddr_t) dest_addr,
0, psm_tag, buf, len, (void *)&req->fi_context, &psm_req);
ep_priv->pending_writes++;
return 0;
}
@ -802,11 +765,10 @@ ssize_t _psmx_writeto(struct fid_ep *ep, const void *buf, size_t len,
PSMX_AM_RMA_HANDLER, args, nargs,
(void *)buf, len, am_flags, NULL, NULL);
ep_priv->pending_writes++;
return 0;
}
static ssize_t psmx_writeto(struct fid_ep *ep, const void *buf, size_t len,
static ssize_t psmx_write(struct fid_ep *ep, const void *buf, size_t len,
void *desc, fi_addr_t dest_addr, uint64_t addr,
uint64_t key, void *context)
{
@ -814,8 +776,8 @@ static ssize_t psmx_writeto(struct fid_ep *ep, const void *buf, size_t len,
ep_priv = container_of(ep, struct psmx_fid_ep, ep);
return _psmx_writeto(ep, buf, len, desc, dest_addr, addr, key, context,
ep_priv->flags, 0);
return _psmx_write(ep, buf, len, desc, dest_addr, addr, key, context,
ep_priv->flags, 0);
}
static ssize_t psmx_writemsg(struct fid_ep *ep, const struct fi_msg_rma *msg,
@ -824,104 +786,55 @@ static ssize_t psmx_writemsg(struct fid_ep *ep, const struct fi_msg_rma *msg,
if (!msg || msg->iov_count != 1)
return -EINVAL;
return _psmx_writeto(ep, msg->msg_iov[0].iov_base,
msg->msg_iov[0].iov_len,
msg->desc ? msg->desc[0] : NULL, msg->addr,
msg->rma_iov[0].addr, msg->rma_iov[0].key,
msg->context, flags, msg->data);
}
static ssize_t psmx_write(struct fid_ep *ep, const void *buf, size_t len,
void *desc, uint64_t addr, uint64_t key,
void *context)
{
struct psmx_fid_ep *ep_priv;
ep_priv = container_of(ep, struct psmx_fid_ep, ep);
assert(ep_priv->domain);
if (!ep_priv->connected)
return -ENOTCONN;
return psmx_writeto(ep, buf, len, desc, (fi_addr_t) ep_priv->peer_psm_epaddr,
addr, key, context);
return _psmx_write(ep, msg->msg_iov[0].iov_base,
msg->msg_iov[0].iov_len,
msg->desc ? msg->desc[0] : NULL, msg->addr,
msg->rma_iov[0].addr, msg->rma_iov[0].key,
msg->context, flags, msg->data);
}
static ssize_t psmx_writev(struct fid_ep *ep, const struct iovec *iov,
void **desc, size_t count, uint64_t addr,
uint64_t key, void *context)
void **desc, size_t count, fi_addr_t dest_addr,
uint64_t addr, uint64_t key, void *context)
{
if (!iov || count != 1)
return -EINVAL;
return psmx_write(ep, iov->iov_base, iov->iov_len,
desc ? desc[0] : NULL, addr, key, context);
desc ? desc[0] : NULL, dest_addr, addr, key, context);
}
static ssize_t psmx_injectto(struct fid_ep *ep, const void *buf, size_t len,
static ssize_t psmx_inject(struct fid_ep *ep, const void *buf, size_t len,
fi_addr_t dest_addr, uint64_t addr, uint64_t key)
{
struct psmx_fid_ep *ep_priv;
ep_priv = container_of(ep, struct psmx_fid_ep, ep);
return _psmx_writeto(ep, buf, len, NULL, dest_addr, addr, key,
NULL, ep_priv->flags | FI_INJECT, 0);
}
static ssize_t psmx_inject(struct fid_ep *ep, const void *buf, size_t len,
uint64_t addr, uint64_t key)
{
struct psmx_fid_ep *ep_priv;
ep_priv = container_of(ep, struct psmx_fid_ep, ep);
assert(ep_priv->domain);
if (!ep_priv->connected)
return -ENOTCONN;
return psmx_injectto(ep, buf, len, (fi_addr_t) ep_priv->peer_psm_epaddr, addr, key);
}
static ssize_t psmx_writedatato(struct fid_ep *ep, const void *buf, size_t len, void *desc,
uint64_t data, fi_addr_t dest_addr, uint64_t addr,
uint64_t key, void *context)
{
struct psmx_fid_ep *ep_priv;
ep_priv = container_of(ep, struct psmx_fid_ep, ep);
return _psmx_writeto(ep, buf, len, desc, dest_addr, addr, key, context,
ep_priv->flags | FI_REMOTE_CQ_DATA, data);
return _psmx_write(ep, buf, len, NULL, dest_addr, addr, key,
NULL, ep_priv->flags | FI_INJECT, 0);
}
static ssize_t psmx_writedata(struct fid_ep *ep, const void *buf, size_t len, void *desc,
uint64_t data, uint64_t addr, uint64_t key, void *context)
uint64_t data, fi_addr_t dest_addr, uint64_t addr,
uint64_t key, void *context)
{
struct psmx_fid_ep *ep_priv;
ep_priv = container_of(ep, struct psmx_fid_ep, ep);
assert(ep_priv->domain);
if (!ep_priv->connected)
return -ENOTCONN;
return psmx_writedatato(ep, buf, len, desc, data, (fi_addr_t) ep_priv->peer_psm_epaddr,
addr, key, context);
return _psmx_write(ep, buf, len, desc, dest_addr, addr, key, context,
ep_priv->flags | FI_REMOTE_CQ_DATA, data);
}
struct fi_ops_rma psmx_rma_ops = {
.read = psmx_read,
.readv = psmx_readv,
.readfrom = psmx_readfrom,
.readmsg = psmx_readmsg,
.write = psmx_write,
.writev = psmx_writev,
.writeto = psmx_writeto,
.writemsg = psmx_writemsg,
.inject = psmx_inject,
.injectto = psmx_injectto,
.writedata = psmx_writedata,
.writedatato = psmx_writedatato,
};

Просмотреть файл

@ -32,10 +32,10 @@
#include "psmx.h"
ssize_t _psmx_tagged_recvfrom(struct fid_ep *ep, void *buf, size_t len,
void *desc, fi_addr_t src_addr,
uint64_t tag, uint64_t ignore,
void *context, uint64_t flags)
ssize_t _psmx_tagged_recv(struct fid_ep *ep, void *buf, size_t len,
void *desc, fi_addr_t src_addr,
uint64_t tag, uint64_t ignore,
void *context, uint64_t flags)
{
struct psmx_fid_ep *ep_priv;
psm_mq_req_t psm_req;
@ -44,6 +44,10 @@ ssize_t _psmx_tagged_recvfrom(struct fid_ep *ep, void *buf, size_t len,
int user_fi_context = 0;
int err;
ep_priv = container_of(ep, struct psmx_fid_ep, ep);
if (ep_priv->connected)
src_addr = (fi_addr_t) ep_priv->peer_psm_epaddr;
if (src_addr)
psmx_debug("%s: warning: src_addr is currently ignored.", __func__);
@ -73,8 +77,6 @@ ssize_t _psmx_tagged_recvfrom(struct fid_ep *ep, void *buf, size_t len,
return 0;
}
ep_priv = container_of(ep, struct psmx_fid_ep, ep);
if (tag & ep_priv->domain->reserved_tag_bits) {
fprintf(stderr, "%s: warning: using reserved tag bits."
"tag=%lx. reserved_bits=%lx.\n", __func__, tag,
@ -110,11 +112,11 @@ ssize_t _psmx_tagged_recvfrom(struct fid_ep *ep, void *buf, size_t len,
return 0;
}
ssize_t psmx_tagged_recvfrom_no_flag_av_map(struct fid_ep *ep, void *buf,
size_t len, void *desc,
fi_addr_t src_addr,
uint64_t tag, uint64_t ignore,
void *context)
ssize_t psmx_tagged_recv_no_flag_av_map(struct fid_ep *ep, void *buf,
size_t len, void *desc,
fi_addr_t src_addr,
uint64_t tag, uint64_t ignore,
void *context)
{
struct psmx_fid_ep *ep_priv;
psm_mq_req_t psm_req;
@ -144,11 +146,11 @@ ssize_t psmx_tagged_recvfrom_no_flag_av_map(struct fid_ep *ep, void *buf,
return 0;
}
ssize_t psmx_tagged_recvfrom_no_flag_av_table(struct fid_ep *ep, void *buf,
size_t len, void *desc,
fi_addr_t src_addr,
uint64_t tag, uint64_t ignore,
void *context)
ssize_t psmx_tagged_recv_no_flag_av_table(struct fid_ep *ep, void *buf,
size_t len, void *desc,
fi_addr_t src_addr,
uint64_t tag, uint64_t ignore,
void *context)
{
struct psmx_fid_ep *ep_priv;
struct psmx_fid_av *av;
@ -188,11 +190,11 @@ ssize_t psmx_tagged_recvfrom_no_flag_av_table(struct fid_ep *ep, void *buf,
return 0;
}
ssize_t psmx_tagged_recvfrom_no_event_av_map(struct fid_ep *ep, void *buf,
size_t len, void *desc,
fi_addr_t src_addr,
uint64_t tag, uint64_t ignore,
void *context)
ssize_t psmx_tagged_recv_no_event_av_map(struct fid_ep *ep, void *buf,
size_t len, void *desc,
fi_addr_t src_addr,
uint64_t tag, uint64_t ignore,
void *context)
{
struct psmx_fid_ep *ep_priv;
psm_mq_req_t psm_req;
@ -216,11 +218,11 @@ ssize_t psmx_tagged_recvfrom_no_event_av_map(struct fid_ep *ep, void *buf,
return psmx_errno(err);
}
ssize_t psmx_tagged_recvfrom_no_event_av_table(struct fid_ep *ep, void *buf,
size_t len, void *desc,
fi_addr_t src_addr,
uint64_t tag, uint64_t ignore,
void *context)
ssize_t psmx_tagged_recv_no_event_av_table(struct fid_ep *ep, void *buf,
size_t len, void *desc,
fi_addr_t src_addr,
uint64_t tag, uint64_t ignore,
void *context)
{
struct psmx_fid_ep *ep_priv;
struct psmx_fid_av *av;
@ -254,16 +256,16 @@ ssize_t psmx_tagged_recvfrom_no_event_av_table(struct fid_ep *ep, void *buf,
return psmx_errno(err);
}
static ssize_t psmx_tagged_recvfrom(struct fid_ep *ep, void *buf, size_t len, void *desc,
fi_addr_t src_addr,
uint64_t tag, uint64_t ignore, void *context)
static ssize_t psmx_tagged_recv(struct fid_ep *ep, void *buf, size_t len, void *desc,
fi_addr_t src_addr,
uint64_t tag, uint64_t ignore, void *context)
{
struct psmx_fid_ep *ep_priv;
ep_priv = container_of(ep, struct psmx_fid_ep, ep);
return _psmx_tagged_recvfrom(ep, buf, len, desc, src_addr, tag, ignore,
context, ep_priv->flags);
return _psmx_tagged_recv(ep, buf, len, desc, src_addr, tag, ignore,
context, ep_priv->flags);
}
static ssize_t psmx_tagged_recvmsg(struct fid_ep *ep, const struct fi_msg_tagged *msg,
@ -284,30 +286,14 @@ static ssize_t psmx_tagged_recvmsg(struct fid_ep *ep, const struct fi_msg_tagged
len = 0;
}
return _psmx_tagged_recvfrom(ep, buf, len,
msg->desc ? msg->desc[0] : NULL,
msg->addr, msg->tag, msg->ignore,
msg->context, flags);
}
static ssize_t psmx_tagged_recv(struct fid_ep *ep, void *buf, size_t len, void *desc,
uint64_t tag, uint64_t ignore, void *context)
{
struct psmx_fid_ep *ep_priv;
ep_priv = container_of(ep, struct psmx_fid_ep, ep);
if (ep_priv->connected)
return psmx_tagged_recvfrom(ep, buf, len, desc,
(fi_addr_t) ep_priv->peer_psm_epaddr,
tag, ignore, context);
else
return psmx_tagged_recvfrom(ep, buf, len, desc, 0,
tag, ignore, context);
return _psmx_tagged_recv(ep, buf, len,
msg->desc ? msg->desc[0] : NULL,
msg->addr, msg->tag, msg->ignore,
msg->context, flags);
}
static ssize_t psmx_tagged_recv_no_flag(struct fid_ep *ep, void *buf,
size_t len, void *desc,
size_t len, void *desc, fi_addr_t src_addr,
uint64_t tag, uint64_t ignore,
void *context)
{
@ -315,19 +301,13 @@ static ssize_t psmx_tagged_recv_no_flag(struct fid_ep *ep, void *buf,
ep_priv = container_of(ep, struct psmx_fid_ep, ep);
if (ep_priv->connected)
return psmx_tagged_recvfrom_no_flag_av_map(
ep, buf, len, desc,
(fi_addr_t) ep_priv->peer_psm_epaddr,
tag, ignore, context);
else
return psmx_tagged_recvfrom_no_flag_av_map(
ep, buf, len, desc, 0,
return psmx_tagged_recv_no_flag_av_map(
ep, buf, len, desc, src_addr,
tag, ignore, context);
}
static ssize_t psmx_tagged_recv_no_event(struct fid_ep *ep, void *buf,
size_t len, void *desc,
size_t len, void *desc, fi_addr_t src_addr,
uint64_t tag, uint64_t ignore,
void *context)
{
@ -335,20 +315,14 @@ static ssize_t psmx_tagged_recv_no_event(struct fid_ep *ep, void *buf,
ep_priv = container_of(ep, struct psmx_fid_ep, ep);
if (ep_priv->connected)
return psmx_tagged_recvfrom_no_event_av_map(
ep, buf, len, desc,
(fi_addr_t) ep_priv->peer_psm_epaddr,
tag, ignore, context);
else
return psmx_tagged_recvfrom_no_event_av_map(
ep, buf, len, desc, 0,
return psmx_tagged_recv_no_event_av_map(
ep, buf, len, desc, src_addr,
tag, ignore, context);
}
static ssize_t psmx_tagged_recvv(struct fid_ep *ep, const struct iovec *iov, void **desc,
size_t count, uint64_t tag, uint64_t ignore,
void *context)
size_t count, fi_addr_t src_addr,
uint64_t tag, uint64_t ignore, void *context)
{
void *buf;
size_t len;
@ -366,12 +340,12 @@ static ssize_t psmx_tagged_recvv(struct fid_ep *ep, const struct iovec *iov, voi
}
return psmx_tagged_recv(ep, buf, len,
desc ? desc[0] : NULL, tag, ignore, context);
desc ? desc[0] : NULL, src_addr, tag, ignore, context);
}
static ssize_t psmx_tagged_recvv_no_flag(struct fid_ep *ep, const struct iovec *iov,
void **desc, size_t count, uint64_t tag,
uint64_t ignore, void *context)
void **desc, size_t count, fi_addr_t src_addr,
uint64_t tag, uint64_t ignore, void *context)
{
void *buf;
size_t len;
@ -389,13 +363,13 @@ static ssize_t psmx_tagged_recvv_no_flag(struct fid_ep *ep, const struct iovec *
}
return psmx_tagged_recv_no_flag(ep, buf, len,
desc ? desc[0] : NULL, tag, ignore,
context);
desc ? desc[0] : NULL, src_addr,
tag, ignore, context);
}
static ssize_t psmx_tagged_recvv_no_event(struct fid_ep *ep, const struct iovec *iov,
void **desc, size_t count, uint64_t tag,
uint64_t ignore, void *context)
void **desc, size_t count, fi_addr_t src_addr,
uint64_t tag, uint64_t ignore, void *context)
{
void *buf;
size_t len;
@ -413,13 +387,13 @@ static ssize_t psmx_tagged_recvv_no_event(struct fid_ep *ep, const struct iovec
}
return psmx_tagged_recv_no_event(ep, buf, len,
desc ? desc[0] : NULL, tag, ignore,
context);
desc ? desc[0] : NULL, src_addr,
tag, ignore, context);
}
ssize_t _psmx_tagged_sendto(struct fid_ep *ep, const void *buf, size_t len,
void *desc, fi_addr_t dest_addr, uint64_t tag,
void *context, uint64_t flags)
ssize_t _psmx_tagged_send(struct fid_ep *ep, const void *buf, size_t len,
void *desc, fi_addr_t dest_addr, uint64_t tag,
void *context, uint64_t flags)
{
struct psmx_fid_ep *ep_priv;
struct psmx_fid_av *av;
@ -432,6 +406,10 @@ ssize_t _psmx_tagged_sendto(struct fid_ep *ep, const void *buf, size_t len,
int err;
size_t idx;
ep_priv = container_of(ep, struct psmx_fid_ep, ep);
if (ep_priv->connected)
dest_addr = (fi_addr_t) ep_priv->peer_psm_epaddr;
if (flags & FI_TRIGGER) {
struct psmx_trigger *trigger;
struct fi_triggered_context *ctxt = context;
@ -457,8 +435,6 @@ ssize_t _psmx_tagged_sendto(struct fid_ep *ep, const void *buf, size_t len,
return 0;
}
ep_priv = container_of(ep, struct psmx_fid_ep, ep);
if (tag & ep_priv->domain->reserved_tag_bits) {
fprintf(stderr, "%s: warning: using reserved tag bits."
"tag=%lx. reserved_bits=%lx.\n", __func__, tag,
@ -512,18 +488,16 @@ ssize_t _psmx_tagged_sendto(struct fid_ep *ep, const void *buf, size_t len,
if (err != PSM_OK)
return psmx_errno(err);
ep_priv->pending_sends++;
if (user_fi_context)
PSMX_CTXT_REQ(fi_context) = psm_req;
return 0;
}
ssize_t psmx_tagged_sendto_no_flag_av_map(struct fid_ep *ep, const void *buf,
size_t len, void *desc,
fi_addr_t dest_addr, uint64_t tag,
void *context)
ssize_t psmx_tagged_send_no_flag_av_map(struct fid_ep *ep, const void *buf,
size_t len, void *desc,
fi_addr_t dest_addr, uint64_t tag,
void *context)
{
struct psmx_fid_ep *ep_priv;
psm_epaddr_t psm_epaddr;
@ -548,15 +522,14 @@ ssize_t psmx_tagged_sendto_no_flag_av_map(struct fid_ep *ep, const void *buf,
if (err != PSM_OK)
return psmx_errno(err);
ep_priv->pending_sends++;
PSMX_CTXT_REQ(fi_context) = psm_req;
return 0;
}
ssize_t psmx_tagged_sendto_no_flag_av_table(struct fid_ep *ep, const void *buf,
size_t len, void *desc,
fi_addr_t dest_addr, uint64_t tag,
void *context)
ssize_t psmx_tagged_send_no_flag_av_table(struct fid_ep *ep, const void *buf,
size_t len, void *desc,
fi_addr_t dest_addr, uint64_t tag,
void *context)
{
struct psmx_fid_ep *ep_priv;
struct psmx_fid_av *av;
@ -588,15 +561,14 @@ ssize_t psmx_tagged_sendto_no_flag_av_table(struct fid_ep *ep, const void *buf,
if (err != PSM_OK)
return psmx_errno(err);
ep_priv->pending_sends++;
PSMX_CTXT_REQ(fi_context) = psm_req;
return 0;
}
ssize_t psmx_tagged_sendto_no_event_av_map(struct fid_ep *ep, const void *buf,
size_t len, void *desc,
fi_addr_t dest_addr, uint64_t tag,
void *context)
ssize_t psmx_tagged_send_no_event_av_map(struct fid_ep *ep, const void *buf,
size_t len, void *desc,
fi_addr_t dest_addr, uint64_t tag,
void *context)
{
struct psmx_fid_ep *ep_priv;
psm_epaddr_t psm_epaddr;
@ -618,14 +590,13 @@ ssize_t psmx_tagged_sendto_no_event_av_map(struct fid_ep *ep, const void *buf,
if (err != PSM_OK)
return psmx_errno(err);
ep_priv->pending_sends++;
return 0;
}
ssize_t psmx_tagged_sendto_no_event_av_table(struct fid_ep *ep, const void *buf,
size_t len, void *desc,
fi_addr_t dest_addr, uint64_t tag,
void *context)
ssize_t psmx_tagged_send_no_event_av_table(struct fid_ep *ep, const void *buf,
size_t len, void *desc,
fi_addr_t dest_addr, uint64_t tag,
void *context)
{
struct psmx_fid_ep *ep_priv;
struct psmx_fid_av *av;
@ -654,20 +625,19 @@ ssize_t psmx_tagged_sendto_no_event_av_table(struct fid_ep *ep, const void *buf,
if (err != PSM_OK)
return psmx_errno(err);
ep_priv->pending_sends++;
return 0;
}
static ssize_t psmx_tagged_sendto(struct fid_ep *ep, const void *buf, size_t len,
void *desc, fi_addr_t dest_addr,
uint64_t tag, void *context)
static ssize_t psmx_tagged_send(struct fid_ep *ep, const void *buf, size_t len,
void *desc, fi_addr_t dest_addr,
uint64_t tag, void *context)
{
struct psmx_fid_ep *ep_priv;
ep_priv = container_of(ep, struct psmx_fid_ep, ep);
return _psmx_tagged_sendto(ep, buf, len, desc, dest_addr, tag, context,
ep_priv->flags);
return _psmx_tagged_send(ep, buf, len, desc, dest_addr, tag, context,
ep_priv->flags);
}
static ssize_t psmx_tagged_sendmsg(struct fid_ep *ep, const struct fi_msg_tagged *msg,
@ -688,96 +658,13 @@ static ssize_t psmx_tagged_sendmsg(struct fid_ep *ep, const struct fi_msg_tagged
len = 0;
}
return _psmx_tagged_sendto(ep, buf, len,
return _psmx_tagged_send(ep, buf, len,
msg->desc ? msg->desc[0] : NULL, msg->addr,
msg->tag, msg->context, flags);
}
static ssize_t psmx_tagged_send(struct fid_ep *ep, const void *buf, size_t len, void *desc,
uint64_t tag, void *context)
{
struct psmx_fid_ep *ep_priv;
ep_priv = container_of(ep, struct psmx_fid_ep, ep);
assert(ep_priv->domain);
if (!ep_priv->connected)
return -ENOTCONN;
return psmx_tagged_sendto(ep, buf, len, desc, (fi_addr_t) ep_priv->peer_psm_epaddr,
tag, context);
}
static ssize_t psmx_tagged_send_no_flag_av_map(struct fid_ep *ep, const void *buf,
size_t len, void *desc, uint64_t tag,
void *context)
{
struct psmx_fid_ep *ep_priv;
ep_priv = container_of(ep, struct psmx_fid_ep, ep);
assert(ep_priv->domain);
if (!ep_priv->connected)
return -ENOTCONN;
return psmx_tagged_sendto_no_flag_av_map(ep, buf, len, desc,
(fi_addr_t) ep_priv->peer_psm_epaddr,
tag, context);
}
static ssize_t psmx_tagged_send_no_flag_av_table(struct fid_ep *ep, const void *buf,
size_t len, void *desc, uint64_t tag,
void *context)
{
struct psmx_fid_ep *ep_priv;
ep_priv = container_of(ep, struct psmx_fid_ep, ep);
assert(ep_priv->domain);
if (!ep_priv->connected)
return -ENOTCONN;
return psmx_tagged_sendto_no_flag_av_table(ep, buf, len, desc,
(fi_addr_t) ep_priv->peer_psm_epaddr,
tag, context);
}
static ssize_t psmx_tagged_send_no_event_av_map(struct fid_ep *ep, const void *buf,
size_t len, void *desc, uint64_t tag,
void *context)
{
struct psmx_fid_ep *ep_priv;
ep_priv = container_of(ep, struct psmx_fid_ep, ep);
assert(ep_priv->domain);
if (!ep_priv->connected)
return -ENOTCONN;
return psmx_tagged_sendto_no_event_av_map(ep, buf, len, desc,
(fi_addr_t) ep_priv->peer_psm_epaddr,
tag, context);
}
static ssize_t psmx_tagged_send_no_event_av_table(struct fid_ep *ep, const void *buf,
size_t len, void *desc, uint64_t tag,
void *context)
{
struct psmx_fid_ep *ep_priv;
ep_priv = container_of(ep, struct psmx_fid_ep, ep);
assert(ep_priv->domain);
if (!ep_priv->connected)
return -ENOTCONN;
return psmx_tagged_sendto_no_event_av_table(ep, buf, len, desc,
(fi_addr_t) ep_priv->peer_psm_epaddr,
tag, context);
}
static ssize_t psmx_tagged_sendv(struct fid_ep *ep, const struct iovec *iov, void **desc,
size_t count, uint64_t tag, void *context)
size_t count, fi_addr_t dest_addr, uint64_t tag, void *context)
{
void *buf;
size_t len;
@ -795,12 +682,13 @@ static ssize_t psmx_tagged_sendv(struct fid_ep *ep, const struct iovec *iov, voi
}
return psmx_tagged_send(ep, buf, len,
desc ? desc[0] : NULL, tag, context);
desc ? desc[0] : NULL, dest_addr, tag, context);
}
static ssize_t psmx_tagged_sendv_no_flag_av_map(struct fid_ep *ep, const struct iovec *iov,
void **desc, size_t count, uint64_t tag,
void *context)
void **desc, size_t count,
fi_addr_t dest_addr,
uint64_t tag, void *context)
{
void *buf;
size_t len;
@ -818,12 +706,14 @@ static ssize_t psmx_tagged_sendv_no_flag_av_map(struct fid_ep *ep, const struct
}
return psmx_tagged_send_no_flag_av_map(ep, buf, len,
desc ? desc[0] : NULL, tag, context);
desc ? desc[0] : NULL, dest_addr,
tag, context);
}
static ssize_t psmx_tagged_sendv_no_flag_av_table(struct fid_ep *ep, const struct iovec *iov,
void **desc, size_t count, uint64_t tag,
void *context)
void **desc, size_t count,
fi_addr_t dest_addr,
uint64_t tag, void *context)
{
void *buf;
size_t len;
@ -841,12 +731,14 @@ static ssize_t psmx_tagged_sendv_no_flag_av_table(struct fid_ep *ep, const struc
}
return psmx_tagged_send_no_flag_av_table(ep, buf, len,
desc ? desc[0] : NULL, tag, context);
desc ? desc[0] : NULL, dest_addr,
tag, context);
}
static ssize_t psmx_tagged_sendv_no_event_av_map(struct fid_ep *ep, const struct iovec *iov,
void **desc, size_t count, uint64_t tag,
void *context)
void **desc, size_t count,
fi_addr_t dest_addr,
uint64_t tag, void *context)
{
void *buf;
size_t len;
@ -864,12 +756,14 @@ static ssize_t psmx_tagged_sendv_no_event_av_map(struct fid_ep *ep, const struct
}
return psmx_tagged_send_no_event_av_map(ep, buf, len,
desc ? desc[0] : NULL, tag, context);
desc ? desc[0] : NULL, dest_addr,
tag, context);
}
static ssize_t psmx_tagged_sendv_no_event_av_table(struct fid_ep *ep, const struct iovec *iov,
void **desc, size_t count, uint64_t tag,
void *context)
void **desc, size_t count,
fi_addr_t dest_addr,
uint64_t tag, void *context)
{
void *buf;
size_t len;
@ -887,31 +781,19 @@ static ssize_t psmx_tagged_sendv_no_event_av_table(struct fid_ep *ep, const stru
}
return psmx_tagged_send_no_event_av_table(ep, buf, len,
desc ? desc[0] : NULL, tag, context);
}
static ssize_t psmx_tagged_injectto(struct fid_ep *ep, const void *buf, size_t len,
fi_addr_t dest_addr, uint64_t tag)
{
struct psmx_fid_ep *ep_priv;
ep_priv = container_of(ep, struct psmx_fid_ep, ep);
return _psmx_tagged_sendto(ep, buf, len, NULL, dest_addr, tag, NULL,
ep_priv->flags | FI_INJECT);
desc ? desc[0] : NULL,
dest_addr, tag, context);
}
static ssize_t psmx_tagged_inject(struct fid_ep *ep, const void *buf, size_t len,
uint64_t tag)
fi_addr_t dest_addr, uint64_t tag)
{
struct psmx_fid_ep *ep_priv;
ep_priv = container_of(ep, struct psmx_fid_ep, ep);
if (!ep_priv->connected)
return -ENOTCONN;
return psmx_tagged_injectto(ep, buf, len, (fi_addr_t) ep_priv->peer_psm_epaddr, tag);
return _psmx_tagged_send(ep, buf, len, NULL, dest_addr, tag, NULL,
ep_priv->flags | FI_INJECT);
}
static ssize_t psmx_tagged_search(struct fid_ep *ep, uint64_t *tag, uint64_t ignore,
@ -959,16 +841,12 @@ struct fi_ops_tagged psmx_tagged_ops = {
.size = sizeof(struct fi_ops_tagged),
.recv = psmx_tagged_recv,
.recvv = psmx_tagged_recvv,
.recvfrom = psmx_tagged_recvfrom,
.recvmsg = psmx_tagged_recvmsg,
.send = psmx_tagged_send,
.sendv = psmx_tagged_sendv,
.sendto = psmx_tagged_sendto,
.sendmsg = psmx_tagged_sendmsg,
.inject = psmx_tagged_inject,
.injectto = psmx_tagged_injectto,
.senddata = fi_no_tagged_senddata,
.senddatato = fi_no_tagged_senddatato,
.search = psmx_tagged_search,
};
@ -977,16 +855,12 @@ struct fi_ops_tagged psmx_tagged_ops_no_flag_av_map = {
.size = sizeof(struct fi_ops_tagged),
.recv = psmx_tagged_recv_no_flag,
.recvv = psmx_tagged_recvv_no_flag,
.recvfrom = psmx_tagged_recvfrom_no_flag_av_map,
.recvmsg = psmx_tagged_recvmsg,
.send = psmx_tagged_send_no_flag_av_map,
.sendv = psmx_tagged_sendv_no_flag_av_map,
.sendto = psmx_tagged_sendto_no_flag_av_map,
.sendmsg = psmx_tagged_sendmsg,
.inject = psmx_tagged_inject,
.injectto = psmx_tagged_injectto,
.senddata = fi_no_tagged_senddata,
.senddatato = fi_no_tagged_senddatato,
.search = psmx_tagged_search,
};
@ -995,16 +869,12 @@ struct fi_ops_tagged psmx_tagged_ops_no_flag_av_table = {
.size = sizeof(struct fi_ops_tagged),
.recv = psmx_tagged_recv_no_flag,
.recvv = psmx_tagged_recvv_no_flag,
.recvfrom = psmx_tagged_recvfrom_no_flag_av_table,
.recvmsg = psmx_tagged_recvmsg,
.send = psmx_tagged_send_no_flag_av_table,
.sendv = psmx_tagged_sendv_no_flag_av_table,
.sendto = psmx_tagged_sendto_no_flag_av_table,
.sendmsg = psmx_tagged_sendmsg,
.inject = psmx_tagged_inject,
.injectto = psmx_tagged_injectto,
.senddata = fi_no_tagged_senddata,
.senddatato = fi_no_tagged_senddatato,
.search = psmx_tagged_search,
};
@ -1013,16 +883,12 @@ struct fi_ops_tagged psmx_tagged_ops_no_event_av_map = {
.size = sizeof(struct fi_ops_tagged),
.recv = psmx_tagged_recv_no_event,
.recvv = psmx_tagged_recvv_no_event,
.recvfrom = psmx_tagged_recvfrom_no_event_av_map,
.recvmsg = psmx_tagged_recvmsg,
.send = psmx_tagged_send_no_event_av_map,
.sendv = psmx_tagged_sendv_no_event_av_map,
.sendto = psmx_tagged_sendto_no_event_av_map,
.sendmsg = psmx_tagged_sendmsg,
.inject = psmx_tagged_inject,
.injectto = psmx_tagged_injectto,
.senddata = fi_no_tagged_senddata,
.senddatato = fi_no_tagged_senddatato,
.search = psmx_tagged_search,
};
@ -1031,16 +897,12 @@ struct fi_ops_tagged psmx_tagged_ops_no_event_av_table = {
.size = sizeof(struct fi_ops_tagged),
.recv = psmx_tagged_recv_no_event,
.recvv = psmx_tagged_recvv_no_event,
.recvfrom = psmx_tagged_recvfrom_no_event_av_table,
.recvmsg = psmx_tagged_recvmsg,
.send = psmx_tagged_send_no_event_av_table,
.sendv = psmx_tagged_sendv_no_event_av_table,
.sendto = psmx_tagged_sendto_no_event_av_table,
.sendmsg = psmx_tagged_sendmsg,
.inject = psmx_tagged_inject,
.injectto = psmx_tagged_injectto,
.senddata = fi_no_tagged_senddata,
.senddatato = fi_no_tagged_senddatato,
.search = psmx_tagged_search,
};
@ -1049,16 +911,12 @@ struct fi_ops_tagged psmx_tagged_ops_no_send_event_av_map = {
.size = sizeof(struct fi_ops_tagged),
.recv = psmx_tagged_recv_no_flag,
.recvv = psmx_tagged_recvv_no_flag,
.recvfrom = psmx_tagged_recvfrom_no_flag_av_map,
.recvmsg = psmx_tagged_recvmsg,
.send = psmx_tagged_send_no_event_av_map,
.sendv = psmx_tagged_sendv_no_event_av_map,
.sendto = psmx_tagged_sendto_no_event_av_map,
.sendmsg = psmx_tagged_sendmsg,
.inject = psmx_tagged_inject,
.injectto = psmx_tagged_injectto,
.senddata = fi_no_tagged_senddata,
.senddatato = fi_no_tagged_senddatato,
.search = psmx_tagged_search,
};
@ -1067,16 +925,12 @@ struct fi_ops_tagged psmx_tagged_ops_no_send_event_av_table = {
.size = sizeof(struct fi_ops_tagged),
.recv = psmx_tagged_recv_no_flag,
.recvv = psmx_tagged_recvv_no_flag,
.recvfrom = psmx_tagged_recvfrom_no_flag_av_table,
.recvmsg = psmx_tagged_recvmsg,
.send = psmx_tagged_send_no_event_av_table,
.sendv = psmx_tagged_sendv_no_event_av_table,
.sendto = psmx_tagged_sendto_no_event_av_table,
.sendmsg = psmx_tagged_sendmsg,
.inject = psmx_tagged_inject,
.injectto = psmx_tagged_injectto,
.senddata = fi_no_tagged_senddata,
.senddatato = fi_no_tagged_senddatato,
.search = psmx_tagged_search,
};
@ -1085,16 +939,12 @@ struct fi_ops_tagged psmx_tagged_ops_no_recv_event_av_map = {
.size = sizeof(struct fi_ops_tagged),
.recv = psmx_tagged_recv_no_event,
.recvv = psmx_tagged_recvv_no_event,
.recvfrom = psmx_tagged_recvfrom_no_event_av_map,
.recvmsg = psmx_tagged_recvmsg,
.send = psmx_tagged_send_no_flag_av_map,
.sendv = psmx_tagged_sendv_no_flag_av_map,
.sendto = psmx_tagged_sendto_no_flag_av_map,
.sendmsg = psmx_tagged_sendmsg,
.inject = psmx_tagged_inject,
.injectto = psmx_tagged_injectto,
.senddata = fi_no_tagged_senddata,
.senddatato = fi_no_tagged_senddatato,
.search = psmx_tagged_search,
};
@ -1103,16 +953,12 @@ struct fi_ops_tagged psmx_tagged_ops_no_recv_event_av_table = {
.size = sizeof(struct fi_ops_tagged),
.recv = psmx_tagged_recv_no_event,
.recvv = psmx_tagged_recvv_no_event,
.recvfrom = psmx_tagged_recvfrom_no_event_av_table,
.recvmsg = psmx_tagged_recvmsg,
.send = psmx_tagged_send_no_flag_av_table,
.sendv = psmx_tagged_sendv_no_flag_av_table,
.sendto = psmx_tagged_sendto_no_flag_av_table,
.sendmsg = psmx_tagged_sendmsg,
.inject = psmx_tagged_inject,
.injectto = psmx_tagged_injectto,
.senddata = fi_no_tagged_senddata,
.senddatato = fi_no_tagged_senddatato,
.search = psmx_tagged_search,
};

Просмотреть файл

@ -150,7 +150,6 @@ static struct fi_ops psmx_fi_ops = {
.size = sizeof(struct fi_ops),
.close = psmx_wait_close,
.bind = fi_no_bind,
.sync = fi_no_sync,
.control = fi_no_control,
.ops_open = fi_no_ops_open,
};
@ -237,6 +236,7 @@ int psmx_wait_open(struct fid_domain *domain, struct fi_wait_attr *attr,
wait_priv->wait.fid.context = 0;
wait_priv->wait.fid.ops = &psmx_fi_ops;
wait_priv->wait.ops = &psmx_wait_ops;
wait_priv->domain = domain_priv;
*waitset = &wait_priv->wait;
return 0;

Просмотреть файл

@ -64,23 +64,20 @@
#define SOCK_EP_MAX_ORDER_WAW_SZ (0)
#define SOCK_EP_MEM_TAG_FMT (0)
#define SOCK_EP_MSG_ORDER (0)
#define SOCK_EP_TX_CTX_CNT (0)
#define SOCK_EP_RX_CTX_CNT (0)
#define SOCK_EP_MAX_EP_CNT (128)
#define SOCK_EP_MAX_TX_CNT (16)
#define SOCK_EP_MAX_RX_CNT (16)
#define SOCK_EP_MAX_IOV_LIMIT (8)
#define SOCK_EP_MAX_TX_CTX_SZ (1<<12)
#define SOCK_EP_BACKLOG (8)
#define SOCK_EP_SNDQ_LEN (128)
#define SOCK_EP_RCVQ_LEN (128)
#define SOCK_PE_POLL_TIMEOUT (100000)
#define SOCK_PE_MAX_ENTRIES (128)
#define SOCK_EQ_DEF_LEN (128)
#define SOCK_CQ_DEF_LEN (128)
#define SOCK_EP_CAP ( FI_MSG | \
FI_INJECT | \
FI_SOURCE | \
FI_SEND | FI_RECV | \
FI_CANCEL )
#define SOCK_EQ_DEF_SZ (1<<12)
#define SOCK_CQ_DEF_SZ (1<<12)
#define SOCK_EP_RDM_CAP (FI_MSG | FI_INJECT | FI_SOURCE | FI_SEND | FI_RECV)
#define SOCK_EP_DGRAM_CAP (FI_MSG | FI_INJECT | FI_SOURCE | FI_SEND | FI_RECV)
#define SOCK_OPS_CAP (FI_INJECT | FI_SEND | FI_RECV )
#define SOCK_MAJOR_VERSION 1
@ -94,12 +91,21 @@ struct sock_fabric{
atomic_t ref;
};
struct sock_conn {
int sock_fd;
};
struct sock_domain {
struct fi_info info;
struct fid_domain dom_fid;
uint64_t mode;
struct sock_fabric *fab;
fastlock_t lock;
atomic_t ref;
struct sock_eq *eq;
struct sock_eq *mr_eq;
struct sock_pe *pe;
struct index_map mr_idm;
};
@ -109,6 +115,7 @@ struct sock_cntr {
uint64_t value;
uint64_t threshold;
atomic_t ref;
atomic_t err_cnt;
pthread_cond_t cond;
pthread_mutex_t mut;
};
@ -197,10 +204,14 @@ struct sock_comm_item{
enum {
SOCK_OP_SEND,
SOCK_OP_RECV,
SOCK_OP_WRITE,
SOCK_OP_READ,
SOCK_OP_TSEND,
SOCK_OP_ATOMIC
SOCK_OP_TRECV,
SOCK_OP_ATOMIC,
SOCK_OP_SEND_INJECT,
SOCK_OP_TSEND_INJECT,
};
/*
@ -210,7 +221,7 @@ enum {
* data - only present if flags indicate
* tag - only present for TSEND op
*/
struct sock_tx_op {
struct sock_op {
uint8_t op;
uint8_t src_iov_len;
uint8_t dest_iov_len;
@ -223,6 +234,23 @@ struct sock_tx_op {
};
};
struct sock_op_send {
struct sock_op op;
uint64_t flags;
uint64_t context;
uint64_t dest_addr;
struct sock_conn *conn;
};
struct sock_op_tsend {
struct sock_op op;
uint64_t flags;
uint64_t context;
uint64_t dest_addr;
struct sock_conn *conn;
uint64_t tag;
};
union sock_iov {
struct fi_rma_iov iov;
struct fi_rma_ioc ioc;
@ -253,55 +281,67 @@ struct sock_eq{
};
struct sock_ep {
struct fid_ep ep;
struct sock_domain *domain;
struct fid_ep ep;
uint8_t enabled;
uint8_t connected;
uint8_t send_cq_event;
uint8_t recv_cq_event;
uint8_t read_cq_event;
uint8_t write_cq_event;
uint8_t rem_read_cq_event;
uint8_t rem_write_cq_event;
int sock_fd;
atomic_t ref;
struct sock_eq *eq;
struct sock_av *av;
struct sock_eq *eq;
struct sock_av *av;
struct sock_domain *domain;
struct sock_cq *send_cq;
struct sock_cq *recv_cq;
int send_cq_event_flag;
int recv_cq_event_flag;
struct sock_cq *send_cq;
struct sock_cq *recv_cq;
struct sock_cq *read_cq;
struct sock_cq *write_cq;
struct sock_cq *rem_read_cq;
struct sock_cq *rem_write_cq;
struct sock_cntr *send_cntr;
struct sock_cntr *recv_cntr;
struct sock_cntr *read_cntr;
struct sock_cntr *write_cntr;
struct sock_cntr *rem_read_cntr;
struct sock_cntr *rem_write_cntr;
uint64_t out_send;
uint64_t out_tagged_send;
uint64_t out_rma_put;
uint64_t out_rma_get;
struct sock_cntr *send_cntr;
struct sock_cntr *recv_cntr;
struct sock_cntr *read_cntr;
struct sock_cntr *write_cntr;
struct sock_cntr *rem_read_cntr;
struct sock_cntr *rem_write_cntr;
uint64_t cmpl_send;
uint64_t cmpl_tagged_send;
uint64_t cmpl_rma_put;
uint64_t cmpl_rma_get;
struct sock_rx_ctx *rx_ctx;
struct sock_tx_ctx *tx_ctx;
struct sock_rx_ctx **rx_array;
struct sock_tx_ctx **tx_array;
atomic_t num_rx_ctx;
atomic_t num_tx_ctx;
struct dlist_entry rx_ctx_entry;
struct dlist_entry tx_ctx_entry;
struct fi_info info;
struct fi_ep_attr ep_attr;
list_t *send_list;
list_t *recv_list;
struct sockaddr src_addr;
struct sockaddr dest_addr;
struct fi_tx_ctx_attr tx_attr;
struct fi_rx_ctx_attr rx_attr;
enum fi_ep_type ep_type;
struct sockaddr_in *src_addr;
struct sockaddr_in *dest_addr;
int connected;
int enabled;
int is_alias;
/* TODO: remove */
struct sock_ep *next;
struct sock_ep *prev;
struct sock_ep *alias;
struct sock_ep *base;
list_t *send_list;
list_t *recv_list;
int port_num;
};
@ -320,41 +360,112 @@ struct sock_pep {
uint64_t pep_cap;
};
struct sock_rx_entry {
struct sock_op rx_op;
uint64_t flags;
uint64_t context;
uint64_t addr;
uint64_t data;
uint64_t tag;
uint64_t ignore;
union sock_iov iov[SOCK_EP_MAX_IOV_LIMIT];
struct dlist_entry entry;
};
struct sock_rx_ctx {
struct fid_ep ctx;
uint16_t rx_id;
uint8_t reserved[6];
uint8_t enabled;
uint8_t progress;
uint8_t recv_cq_event;
uint8_t rem_read_cq_event;
uint8_t rem_write_cq_event;
uint8_t reserved[1];
uint64_t addr;
struct sock_cq *cq;
struct sock_ep *ep;
struct sock_cq *recv_cq;
struct sock_cq *rem_read_cq;
struct sock_cq *rem_write_cq;
struct sock_ep *ep;
struct sock_domain *domain;
struct sock_cntr *recv_cntr;
struct sock_cntr *rem_read_cntr;
struct sock_cntr *rem_write_cntr;
struct dlist_entry ep_entry;
struct dlist_entry cq_entry;
struct dlist_entry pe_entry;
struct dlist_entry pe_entry_list;
struct dlist_entry rx_entry_list;
struct dlist_entry ep_list;
fastlock_t lock;
struct fi_rx_ctx_attr attr;
};
struct sock_tx_ctx {
struct fid_ep ctx;
struct ringbuffd rbfd;
fastlock_t wlock;
fastlock_t rlock;
uint16_t tx_id;
uint8_t reserved[6];
uint8_t enabled;
uint8_t progress;
uint8_t send_cq_event;
uint8_t read_cq_event;
uint8_t write_cq_event;
uint8_t reserved[1];
uint64_t addr;
struct sock_cq *cq;
struct sock_ep *ep;
struct sock_cq *send_cq;
struct sock_cq *read_cq;
struct sock_cq *write_cq;
struct sock_ep *ep;
struct sock_domain *domain;
struct sock_cntr *send_cntr;
struct sock_cntr *read_cntr;
struct sock_cntr *write_cntr;
struct dlist_entry ep_entry;
struct dlist_entry cq_entry;
struct dlist_entry pe_entry;
struct dlist_entry pe_entry_list;
struct dlist_entry ep_list;
fastlock_t lock;
struct fi_tx_ctx_attr attr;
};
#define SOCK_WIRE_PROTO_VERSION (0)
struct sock_msg_hdr{
uint8_t version;
uint8_t op_type;
uint16_t rx_id;
uint8_t reserved[4];
uint64_t src_addr;
uint64_t flags;
uint64_t msg_len;
};
struct sock_msg_send{
struct sock_msg_hdr msg_hdr;
/* data */
/* user data */
};
struct sock_tx_iov {
@ -363,7 +474,7 @@ struct sock_tx_iov {
};
struct sock_tx_pe_entry{
struct sock_tx_op tx_op;
struct sock_op tx_op;
uint8_t header_sent;
uint8_t reserved[7];
@ -374,7 +485,7 @@ struct sock_tx_pe_entry{
};
struct sock_rx_pe_entry{
struct sock_tx_op rx_op;
struct sock_op rx_op;
void *raw_data;
union sock_iov rx_iov[SOCK_EP_MAX_IOV_LIMIT];
};
@ -391,6 +502,8 @@ struct sock_pe_entry{
struct sock_rx_pe_entry rx;
};
struct sock_msg_hdr msg_hdr;
uint64_t flags;
uint64_t context;
uint64_t addr;
@ -427,14 +540,17 @@ struct sock_cq {
sock_cq_report_fn report_completion;
};
int sock_verify_info(struct fi_info *hints);
int sock_verify_fabric_attr(struct fi_fabric_attr *attr);
int sock_verify_domain_attr(struct fi_domain_attr *attr);
int sock_rdm_verify_ep_attr(struct fi_ep_attr *ep_attr,
struct fi_tx_ctx_attr *tx_attr,
struct fi_rx_ctx_attr *rx_attr);
int _sock_verify_info(struct fi_info *hints);
int _sock_verify_ep_attr(struct fi_ep_attr *attr);
int _sock_verify_fabric_attr(struct fi_fabric_attr *attr);
int _sock_verify_domain_attr(struct fi_domain_attr *attr);
int sock_rdm_getinfo(uint32_t version, const char *node, const char *service,
uint64_t flags, struct fi_info *hints, struct fi_info **info);
int sock_dgram_getinfo(uint32_t version, const char *node, const char *service,
uint64_t flags, struct fi_info *hints, struct fi_info **info);
@ -446,6 +562,8 @@ int sock_domain(struct fid_fabric *fabric, struct fi_info *info,
int sock_av_open(struct fid_domain *domain, struct fi_av_attr *attr,
struct fid_av **av, void *context);
fi_addr_t _sock_av_lookup(struct sock_av *av, struct sockaddr *addr);
int sock_av_lookup_addr(struct sock_av *av, fi_addr_t addr,
struct sock_conn **entry);
int sock_cq_open(struct fid_domain *domain, struct fi_cq_attr *attr,
@ -478,13 +596,18 @@ int sock_ep_connect(struct fid_ep *ep, const void *addr,
const void *param, size_t paramlen);
struct sock_rx_ctx *sock_rx_ctx_alloc();
struct sock_rx_ctx *sock_rx_ctx_alloc(struct fi_rx_ctx_attr *attr,
void *context);
void sock_rx_ctx_add_ep(struct sock_rx_ctx *rx_ctx, struct sock_ep *ep);
void sock_rx_ctx_free(struct sock_rx_ctx *rx_ctx);
struct sock_tx_ctx *sock_tx_ctx_alloc(size_t size);
struct sock_tx_ctx *sock_tx_ctx_alloc(struct fi_tx_ctx_attr *attr,
void *context);
void sock_tx_ctx_add_ep(struct sock_tx_ctx *tx_ctx, struct sock_ep *ep);
void sock_tx_ctx_free(struct sock_tx_ctx *tx_ctx);
void sock_tx_ctx_start(struct sock_tx_ctx *tx_ctx);
int sock_tx_ctx_write(struct sock_tx_ctx *tx_ctx, const void *buf, size_t len);
void sock_tx_ctx_write(struct sock_tx_ctx *tx_ctx, const void *buf, size_t len);
void sock_tx_ctx_commit(struct sock_tx_ctx *tx_ctx);
void sock_tx_ctx_abort(struct sock_tx_ctx *tx_ctx);
int sock_tx_ctx_read(struct sock_tx_ctx *tx_ctx, void *buf, size_t len);
@ -495,6 +618,12 @@ int sock_poll_open(struct fid_domain *domain, struct fi_poll_attr *attr,
int sock_wait_open(struct fid_domain *domain, struct fi_wait_attr *attr,
struct fid_wait **waitset);
struct sock_pe *sock_pe_init(struct sock_domain *domain);
int sock_pe_add_tx_ctx(struct sock_pe *pe, struct sock_tx_ctx *ctx);
int sock_pe_add_rx_ctx(struct sock_pe *pe, struct sock_rx_ctx *ctx);
int sock_pe_progress_rx_ctx(struct sock_pe *pe, struct sock_rx_ctx *rx_ctx);
int sock_pe_progress_tx_ctx(struct sock_pe *pe, struct sock_tx_ctx *tx_ctx);
void sock_pe_finalize(struct sock_pe *pe);
void free_fi_info(struct fi_info *info);

Просмотреть файл

@ -161,7 +161,6 @@ static struct fi_ops sock_av_fi_ops = {
.size = sizeof(struct fi_ops),
.close = sock_av_close,
.bind = sock_av_bind,
.sync = fi_no_sync,
.control = fi_no_control,
.ops_open = fi_no_ops_open,
};
@ -272,3 +271,10 @@ fi_addr_t _sock_av_lookup(struct sock_av *av, struct sockaddr *addr)
}
return FI_ADDR_UNSPEC;
}
/* place holder */
int sock_av_lookup_addr(struct sock_av *av, fi_addr_t addr,
struct sock_conn **entry)
{
return -FI_ENOSYS;
}

Просмотреть файл

@ -50,6 +50,23 @@ static uint64_t sock_cntr_read(struct fid_cntr *cntr)
return _cntr->value;
}
int sock_cntr_inc(struct sock_cntr *cntr)
{
pthread_mutex_lock(&cntr->mut);
cntr->value += 1;
if (cntr->value >= cntr->threshold)
pthread_cond_signal(&cntr->cond);
pthread_mutex_unlock(&cntr->mut);
return 0;
}
int sock_cntr_err_inc(struct sock_cntr *cntr)
{
atomic_inc(&cntr->err_cnt);
pthread_cond_signal(&cntr->cond);
return 0;
}
static int sock_cntr_add(struct fid_cntr *cntr, uint64_t value)
{
struct sock_cntr *_cntr;
@ -106,8 +123,16 @@ static int sock_cntr_close(struct fid *fid)
return 0;
}
uint64_t sock_cntr_readerr(struct fid_cntr *cntr)
{
struct sock_cntr *_cntr;
_cntr = container_of(cntr, struct sock_cntr, cntr_fid);
return atomic_get(&_cntr->err_cnt);
}
static struct fi_ops_cntr sock_cntr_ops = {
.size = sizeof(struct fi_ops_cntr),
.readerr = sock_cntr_readerr,
.read = sock_cntr_read,
.add = sock_cntr_add,
.set = sock_cntr_set,
@ -143,6 +168,8 @@ int sock_cntr_open(struct fid_domain *domain, struct fi_cntr_attr *attr,
goto err2;
atomic_init(&_cntr->ref, 0);
atomic_init(&_cntr->err_cnt, 0);
_cntr->cntr_fid.fid.fclass = FI_CLASS_CNTR;
_cntr->cntr_fid.fid.context = context;
_cntr->cntr_fid.fid.ops = &sock_cntr_fi_ops;

Просмотреть файл

@ -73,7 +73,7 @@ static ssize_t sock_cq_entry_size(struct sock_cq *sock_cq)
case FI_CQ_FORMAT_UNSPEC:
default:
size = -1;
sock_debug(SOCK_ERROR, "CQ: Invalid CQ format\n");
SOCK_LOG_ERROR("CQ: Invalid CQ format\n");
break;
}
return size;
@ -188,7 +188,7 @@ static void sock_cq_set_report_fn(struct sock_cq *sock_cq)
case FI_CQ_FORMAT_UNSPEC:
default:
sock_debug(SOCK_ERROR, "CQ: Invalid CQ format\n");
SOCK_LOG_ERROR("CQ: Invalid CQ format\n");
break;
}
}
@ -254,15 +254,12 @@ ssize_t sock_cq_read(struct fid_cq *cq, void *buf, size_t count)
ssize_t sock_cq_readerr(struct fid_cq *cq, struct fi_cq_err_entry *buf,
size_t len, uint64_t flags)
uint64_t flags)
{
ssize_t num_read;
struct sock_cq *sock_cq;
sock_cq = container_of(cq, struct sock_cq, cq_fid);
if(len < sizeof(struct fi_cq_err_entry))
return -FI_ETOOSMALL;
num_read = 0;
fastlock_acquire(&sock_cq->lock);
@ -301,7 +298,7 @@ ssize_t sock_cq_writeerr(struct fid_cq *cq, struct fi_cq_err_entry *buf,
}
const char * sock_cq_strerror(struct fid_cq *cq, int prov_errno,
const void *err_data, void *buf, size_t len)
const void *err_data, char *buf, size_t len)
{
if (buf && len)
return strncpy(buf, strerror(prov_errno), len);
@ -373,7 +370,7 @@ static int sock_cq_verify_attr(struct fi_cq_attr *attr)
}
static struct fi_cq_attr _sock_cq_def_attr = {
.size = SOCK_CQ_DEF_LEN,
.size = SOCK_CQ_DEF_SZ,
.flags = 0,
.format = FI_CQ_FORMAT_CONTEXT,
.wait_obj = FI_WAIT_FD,

Просмотреть файл

@ -38,33 +38,46 @@
#include <string.h>
#include "sock.h"
#include "sock_util.h"
struct sock_rx_ctx *sock_rx_ctx_alloc()
struct sock_rx_ctx *sock_rx_ctx_alloc(struct fi_rx_ctx_attr *attr, void *context)
{
struct sock_rx_ctx *rx_ctx;
rx_ctx = calloc(1, sizeof(*rx_ctx));
if(!rx_ctx)
if (!rx_ctx)
return NULL;
dlist_init(&rx_ctx->ep_entry);
dlist_init(&rx_ctx->cq_entry);
dlist_init(&rx_ctx->pe_entry);
dlist_init(&rx_ctx->pe_entry_list);
dlist_init(&rx_ctx->rx_entry_list);
dlist_init(&rx_ctx->ep_list);
fastlock_init(&rx_ctx->lock);
rx_ctx->ctx.fid.fclass = FI_CLASS_RX_CTX;
rx_ctx->ctx.fid.context = context;
rx_ctx->attr = *attr;
return rx_ctx;
}
void sock_rx_ctx_add_ep(struct sock_rx_ctx *rx_ctx, struct sock_ep *ep)
{
fastlock_acquire(&rx_ctx->lock);
dlist_insert_tail(&ep->rx_ctx_entry, &rx_ctx->ep_list);
atomic_inc(&ep->num_rx_ctx);
fastlock_release(&rx_ctx->lock);
}
void sock_rx_ctx_free(struct sock_rx_ctx *rx_ctx)
{
fastlock_destroy(&rx_ctx->lock);
free(rx_ctx);
}
struct sock_tx_ctx *sock_tx_ctx_alloc(size_t size)
struct sock_tx_ctx *sock_tx_ctx_alloc(struct fi_tx_ctx_attr *attr, void *context)
{
struct sock_tx_ctx *tx_ctx;
@ -72,23 +85,36 @@ struct sock_tx_ctx *sock_tx_ctx_alloc(size_t size)
if (!tx_ctx)
return NULL;
if (rbfdinit(&tx_ctx->rbfd, size))
if (rbfdinit(&tx_ctx->rbfd, attr->size))
goto err;
dlist_init(&tx_ctx->ep_entry);
dlist_init(&tx_ctx->cq_entry);
dlist_init(&tx_ctx->pe_entry);
dlist_init(&tx_ctx->pe_entry_list);
dlist_init(&tx_ctx->ep_list);
fastlock_init(&tx_ctx->rlock);
fastlock_init(&tx_ctx->wlock);
tx_ctx->ctx.fid.fclass = FI_CLASS_TX_CTX;
tx_ctx->ctx.fid.context = context;
tx_ctx->attr = *attr;
return tx_ctx;
err:
free(tx_ctx);
return NULL;
}
void sock_tx_ctx_add_ep(struct sock_tx_ctx *tx_ctx, struct sock_ep *ep)
{
fastlock_acquire(&tx_ctx->lock);
dlist_insert_tail(&ep->tx_ctx_entry, &tx_ctx->ep_list);
atomic_inc(&ep->num_tx_ctx);
fastlock_release(&tx_ctx->lock);
}
void sock_tx_ctx_free(struct sock_tx_ctx *tx_ctx)
{
fastlock_destroy(&tx_ctx->rlock);
@ -102,13 +128,9 @@ void sock_tx_ctx_start(struct sock_tx_ctx *tx_ctx)
fastlock_acquire(&tx_ctx->wlock);
}
int sock_tx_ctx_write(struct sock_tx_ctx *tx_ctx, const void *buf, size_t len)
void sock_tx_ctx_write(struct sock_tx_ctx *tx_ctx, const void *buf, size_t len)
{
if (rbfdavail(&tx_ctx->rbfd) < len)
return -FI_EAGAIN;
rbfdwrite(&tx_ctx->rbfd, buf, len);
return 0;
}
void sock_tx_ctx_commit(struct sock_tx_ctx *tx_ctx)

Просмотреть файл

@ -78,7 +78,7 @@ int sockd_check_hints(struct fi_info *hints)
case FI_EP_DGRAM:
break;
default:
sock_debug(SOCK_ERROR,"[sockd] %s: hints->type = %d, only FI_EP_DGRAM = %d is supported\n",
SOCK_LOG_ERROR("[sockd] %s: hints->type = %d, only FI_EP_DGRAM = %d is supported\n",
__func__, hints->ep_type, FI_EP_DGRAM);
return -FI_ENODATA;
}
@ -89,7 +89,7 @@ int sockd_check_hints(struct fi_info *hints)
case FI_SOCKADDR_IN6:
break;
default:
sock_debug(SOCK_ERROR,"[sockd] %s: hints->addr_format = %d, supported = FI_SOCKADDR or FI_SOCKADDR_IN or FI_SOCKADDR_IN6\n",
SOCK_LOG_ERROR("[sockd] %s: hints->addr_format = %d, supported = FI_SOCKADDR or FI_SOCKADDR_IN or FI_SOCKADDR_IN6\n",
__func__, hints->addr_format);
return -FI_ENODATA;
}
@ -99,23 +99,31 @@ int sockd_check_hints(struct fi_info *hints)
case FI_PROTO_UNSPEC:
break;
default:
sock_debug(SOCK_ERROR,"[sockd] %s: hints->ep_attr->protocol=%lu, supported=%d\n",
/*
SOCK_LOG_ERROR("[sockd] %s: hints->ep_attr->protocol=%lu, supported=%d\n",
__func__, hints->ep_attr->protocol, FI_PROTO_UNSPEC);
*/
return -FI_ENODATA;
}
if (hints->ep_attr->max_msg_size > SOCKD_MTU) {
sock_debug(SOCK_ERROR,"[sockd] %s: hints->ep_attr->max_msg_size=%d, supported=%d\n",
/*
SOCK_LOG_ERROR("[sockd] %s: hints->ep_attr->max_msg_size=%d, supported=%d\n",
__func__, hints->ep_attr->max_msg_size, SOCKD_MTU);
*/
return -FI_ENODATA;
}
if (hints->ep_attr->inject_size > SOCKD_MTU) {
sock_debug(SOCK_ERROR,"[sockd] %s: hints->ep_attr->inject_size=%d, supported=%d\n",
/*
SOCK_LOG_ERROR("[sockd] %s: hints->ep_attr->inject_size=%d, supported=%d\n",
__func__, hints->ep_attr->inject_size, SOCKD_MTU);
*/
return -FI_ENODATA;
}
if (hints->ep_attr->total_buffered_recv > so_rcvbuf) {
sock_debug(SOCK_ERROR,"[sockd] %s: hints->ep_attr->total_buffered_recv=%d, supported=%d\n",
/*
SOCK_LOG_ERROR("[sockd] %s: hints->ep_attr->total_buffered_recv=%d, supported=%d\n",
__func__, hints->ep_attr->total_buffered_recv, so_rcvbuf);
*/
return -FI_ENODATA;
}
/* FIXME: check
@ -126,21 +134,25 @@ int sockd_check_hints(struct fi_info *hints)
* msg_order */
}
if ((hints->caps & SOCK_EP_CAP) != hints->caps) {
sock_debug(SOCK_ERROR,"[sockd] %s: hints->ep_cap=0x%llx, supported=0x%llx\n",
__func__, hints->caps, SOCK_EP_CAP);
if ((hints->caps & SOCK_EP_DGRAM_CAP) != hints->caps) {
/*
SOCK_LOG_ERROR("[sockd] %s: hints->ep_cap=0x%llx, supported=0x%llx\n",
__func__, hints->caps, SOCK_EP_DGRAM_CAP);
*/
return -FI_ENODATA;
}
if (hints->tx_attr && ((hints->tx_attr->op_flags & SOCKD_OP_FLAGS) != hints->tx_attr->op_flags)) {
sock_debug(SOCK_ERROR,"[sockd] %s: hints->tx_attr->op_flags=0x%llx, supported=0x%llx\n",
/*
SOCK_LOG_ERROR("[sockd] %s: hints->tx_attr->op_flags=0x%llx, supported=0x%llx\n",
__func__, hints->tx_attr->op_flags, SOCKD_OP_FLAGS);
*/
return -FI_ENODATA;
}
#if 0 /* TODO */
if ((hints->domain_cap & SOCKD_DOMAIN_CAP) != hints->domain_cap) {
sock_debug(SOCK_ERROR,"[sockd] %s: hints->domain_cap=0x%llx, supported=0x%llx\n",
SOCK_LOG_ERROR("[sockd] %s: hints->domain_cap=0x%llx, supported=0x%llx\n",
__func__, hints->domain_cap, SOCKD_DOMAIN_CAP);
return -FI_ENODATA;
/* FIXME: check
@ -154,15 +166,15 @@ int sockd_check_hints(struct fi_info *hints)
struct sockaddr_in *si_src;
if (!hints->src_addr || !hints->src_addrlen) {
sock_debug(SOCK_ERROR,"[sockd] src_addr and src_addrlen are required from hints\n");
SOCK_LOG_ERROR("[sockd] src_addr and src_addrlen are required from hints\n");
return -FI_ENODATA;
} else {
si_src = (struct sockaddr_in *)(hints->src_addr);
if (ntohs(si_src->sin_port)<1024) {
sock_debug(SOCK_ERROR,"[sockd] port number should be above 1023\n");
SOCK_LOG_ERROR("[sockd] port number should be above 1023\n");
return -FI_ENODATA;
}
sock_debug(SOCK_ERROR,"[sockd] port is set to %d\n", ntohs(si_src->sin_port));
SOCK_LOG_ERROR("[sockd] port is set to %d\n", ntohs(si_src->sin_port));
}
return 0;
@ -188,7 +200,7 @@ static struct fi_info* sockd_dupinfo(struct fi_info *hints)
fi->caps = hints->caps;
fi->addr_format = hints->addr_format;
} else {
fi->caps = SOCK_EP_CAP;
fi->caps = SOCK_EP_DGRAM_CAP;
fi->addr_format = FI_SOCKADDR;
}
@ -240,7 +252,7 @@ static struct fi_info* sockd_dupinfo(struct fi_info *hints)
memcpy(fi->src_addr, hints->src_addr, hints->src_addrlen);
fi->src_addrlen = hints->src_addrlen;
} else {
sock_debug(SOCK_ERROR,"[sockd] hints must have src_addr\n");
SOCK_LOG_ERROR("[sockd] hints must have src_addr\n");
#if 0
fi->src_addr = NULL;
fi->src_addrlen = 0;
@ -314,7 +326,7 @@ int sock_dgram_getinfo(uint32_t version, const char *node, const char *service,
};
ret = getaddrinfo(node, service, &sock_hints, &res);
if (ret) {
sock_debug(SOCK_ERROR,"%s: couldn't getaddrinfo for (%s:%s):%s\n", __func__, node, service, gai_strerror(ret));
SOCK_LOG_ERROR("%s: couldn't getaddrinfo for (%s:%s):%s\n", __func__, node, service, gai_strerror(ret));
return -FI_ENODATA;
}
freeaddrinfo(res);
@ -322,7 +334,7 @@ int sock_dgram_getinfo(uint32_t version, const char *node, const char *service,
sockfd = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP);
if (sockfd < 0) {
sock_debug(SOCK_ERROR,"%s: couldn't open DGRAM socket\n", __func__);
SOCK_LOG_ERROR("%s: couldn't open DGRAM socket\n", __func__);
return -FI_ENODATA;
}
@ -358,7 +370,7 @@ static int sockd_ep_close(fid_t fid)
ep = container_of(fid, struct sock_ep, ep.fid);
if (ep->sock_fd)
if (close(ep->sock_fd)) {
sock_debug(SOCK_ERROR,"[sockd] cannot close sock_fd\n");
SOCK_LOG_ERROR("[sockd] cannot close sock_fd\n");
return -FI_ENODATA;
}
@ -378,11 +390,11 @@ static int sockd_ep_bind(struct fid *fid, struct fid *bfid, uint64_t flags)
switch (bfid->fclass) {
case FI_CLASS_CNTR:
sock_debug(SOCK_ERROR,"[sockd] bind counter to ep\n");
SOCK_LOG_ERROR("[sockd] bind counter to ep\n");
cntr = container_of(bfid, struct sock_cntr, cntr_fid.fid);
if (!(flags &
(FI_WRITE | FI_READ | FI_SEND | FI_RECV))) {
sock_debug(SOCK_ERROR,"[sockd] Counter only support FI_WRITE | FI_READ | FI_SEND | FI_RECV\n");
SOCK_LOG_ERROR("[sockd] Counter only support FI_WRITE | FI_READ | FI_SEND | FI_RECV\n");
errno = FI_EINVAL;
return -errno;
}
@ -408,11 +420,11 @@ static int sockd_ep_bind(struct fid *fid, struct fid *bfid, uint64_t flags)
}
break;
case FI_CLASS_CQ:
sock_debug(SOCK_ERROR,"[sockd] bind CQ to ep\n");
SOCK_LOG_ERROR("[sockd] bind CQ to ep\n");
cq = container_of(bfid, struct sock_cq, cq_fid.fid);
if (!(flags &
(FI_SEND | FI_RECV))) {
sock_debug(SOCK_ERROR,"[sockd] CQ only support FI_SEND | FI_RECV\n");
SOCK_LOG_ERROR("[sockd] CQ only support FI_SEND | FI_RECV\n");
errno = FI_EINVAL;
return -errno;
}
@ -433,7 +445,7 @@ static int sockd_ep_bind(struct fid *fid, struct fid *bfid, uint64_t flags)
*/
break;
case FI_CLASS_EQ:
sock_debug(SOCK_ERROR,"[sockd] bind EQ to ep\n");
SOCK_LOG_ERROR("[sockd] bind EQ to ep\n");
/* FIXME: bind EQ to sockd EP */
eq = container_of(bfid, struct sock_eq, eq.fid);
if (ep->eq) {
@ -442,7 +454,7 @@ static int sockd_ep_bind(struct fid *fid, struct fid *bfid, uint64_t flags)
ep->eq = eq;
break;
case FI_CLASS_AV:
sock_debug(SOCK_ERROR,"[sockd] bind AV to ep\n");
SOCK_LOG_ERROR("[sockd] bind AV to ep\n");
av = container_of(bfid,
struct sock_av, av_fid.fid);
if (ep->domain != av->dom)
@ -456,12 +468,6 @@ static int sockd_ep_bind(struct fid *fid, struct fid *bfid, uint64_t flags)
return 0;
}
static int sockd_ep_sync(fid_t fid, uint64_t flags, void *context)
{
errno = FI_ENOSYS;
return -errno;
}
static int sockd_ep_control(fid_t fid, int command, void *arg)
{
errno = FI_ENOSYS;
@ -508,7 +514,7 @@ static int sockd_ep_setopt(fid_t fid, int level, int optname,
return -errno;
}
static int sockd_ep_tx_ctx(struct fid_ep *ep, int index,
static int sockd_ep_tx_ctx(struct fid_sep *sep, int index,
struct fi_tx_ctx_attr *attr, struct fid_ep **tx_ep,
void *context)
{
@ -517,7 +523,7 @@ static int sockd_ep_tx_ctx(struct fid_ep *ep, int index,
}
static int sockd_ep_rx_ctx(struct fid_ep *ep, int index,
static int sockd_ep_rx_ctx(struct fid_sep *sep, int index,
struct fi_rx_ctx_attr *attr, struct fid_ep **rx_ep,
void *context)
{
@ -588,20 +594,6 @@ static int sockd_cm_leave(struct fid_ep *ep, void *addr, fi_addr_t fi_addr,
/* sockd_ops_msg */
static ssize_t sockd_msg_recv(struct fid_ep *ep, void *buf, size_t len, void *desc,
void *context)
{
errno = FI_ENOSYS;
return -errno;
}
static ssize_t sockd_msg_recvv(struct fid_ep *ep, const struct iovec *iov, void **desc,
size_t count, void *context)
{
errno = FI_ENOSYS;
return -errno;
}
static ssize_t sockd_msg_recvfrom(struct fid_ep *ep, void *buf, size_t len, void *desc,
fi_addr_t src_addr, void *context)
{
struct sock_ep *sock_ep;
@ -640,6 +632,13 @@ static ssize_t sockd_msg_recvfrom(struct fid_ep *ep, void *buf, size_t len, void
return 0;
}
static ssize_t sockd_msg_recvv(struct fid_ep *ep, const struct iovec *iov, void **desc,
size_t count, fi_addr_t src_addr, void *context)
{
errno = FI_ENOSYS;
return -errno;
}
static ssize_t sockd_msg_recvmsg(struct fid_ep *ep, const struct fi_msg *msg,
uint64_t flags)
{
@ -648,20 +647,6 @@ static ssize_t sockd_msg_recvmsg(struct fid_ep *ep, const struct fi_msg *msg,
}
static ssize_t sockd_msg_send(struct fid_ep *ep, const void *buf, size_t len, void *desc,
void *context)
{
errno = FI_ENOSYS;
return -errno;
}
static ssize_t sockd_msg_sendv(struct fid_ep *ep, const struct iovec *iov, void **desc,
size_t count, void *context)
{
errno = FI_ENOSYS;
return -errno;
}
static ssize_t sockd_msg_sendto(struct fid_ep *ep, const void *buf, size_t len, void *desc,
fi_addr_t dest_addr, void *context)
{
struct sock_ep *sock_ep;
@ -700,6 +685,13 @@ static ssize_t sockd_msg_sendto(struct fid_ep *ep, const void *buf, size_t len,
return 0;
}
static ssize_t sockd_msg_sendv(struct fid_ep *ep, const struct iovec *iov, void **desc,
size_t count, fi_addr_t dest_addr, void *context)
{
errno = FI_ENOSYS;
return -errno;
}
static ssize_t sockd_msg_sendmsg(struct fid_ep *ep, const struct fi_msg *msg,
uint64_t flags)
{
@ -707,28 +699,15 @@ static ssize_t sockd_msg_sendmsg(struct fid_ep *ep, const struct fi_msg *msg,
return -errno;
}
static ssize_t sockd_msg_inject(struct fid_ep *ep, const void *buf, size_t len)
{
errno = FI_ENOSYS;
return -errno;
}
static ssize_t sockd_msg_injectto(struct fid_ep *ep, const void *buf, size_t len,
fi_addr_t dest_addr)
static ssize_t sockd_msg_inject(struct fid_ep *ep, const void *buf, size_t len,
fi_addr_t dest_addr)
{
errno = FI_ENOSYS;
return -errno;
}
static ssize_t sockd_msg_senddata(struct fid_ep *ep, const void *buf, size_t len, void *desc,
uint64_t data, void *context)
{
errno = FI_ENOSYS;
return -errno;
}
static ssize_t sockd_msg_senddatato(struct fid_ep *ep, const void *buf, size_t len, void *desc,
uint64_t data, fi_addr_t dest_addr, void *context)
uint64_t data, fi_addr_t dest_addr, void *context)
{
errno = FI_ENOSYS;
return -errno;
@ -738,7 +717,6 @@ static struct fi_ops sockd_ep_fi_ops = {
.size = sizeof(struct fi_ops),
.close = sockd_ep_close,
.bind = sockd_ep_bind,
.sync = sockd_ep_sync,
.control = sockd_ep_control,
.ops_open = sockd_ep_ops_open
};
@ -770,26 +748,22 @@ static struct fi_ops_msg sockd_ops_msg = {
.size = sizeof(struct fi_ops_msg),
.recv = sockd_msg_recv,
.recvv = sockd_msg_recvv,
.recvfrom = sockd_msg_recvfrom,
.recvmsg = sockd_msg_recvmsg,
.send = sockd_msg_send,
.sendv = sockd_msg_sendv,
.sendto = sockd_msg_sendto,
.sendmsg = sockd_msg_sendmsg,
.inject = sockd_msg_inject,
.injectto = sockd_msg_injectto,
.senddata = sockd_msg_senddata,
.senddatato = sockd_msg_senddatato
};
static inline int _sock_ep_dgram_progress(struct sock_ep *ep, struct sock_cq *cq)
{
struct sock_req_item *item;
if((item = dequeue_item(ep->send_list))) {
sock_debug(SOCK_ERROR,"[ep_dgram_progress] found a send req\n");
SOCK_LOG_ERROR("[ep_dgram_progress] found a send req\n");
}
if((item = dequeue_item(ep->recv_list))) {
sock_debug(SOCK_ERROR,"[ep_dgram_progress] found a recv req\n");
SOCK_LOG_ERROR("[ep_dgram_progress] found a recv req\n");
}
return -FI_ENOSYS;
}
@ -797,7 +771,7 @@ static inline int _sock_ep_dgram_progress(struct sock_ep *ep, struct sock_cq *cq
int sock_dgram_ep(struct fid_domain *domain, struct fi_info *info,
struct fid_ep **ep, void *context)
{
sock_debug(SOCK_ERROR,"[sockd] enter sock_dgram_ep\n");
SOCK_LOG_ERROR("[sockd] enter sock_dgram_ep\n");
struct sock_ep *_ep;
struct sock_domain *_dom;
struct sockaddr_in si_me;
@ -823,7 +797,7 @@ int sock_dgram_ep(struct fid_domain *domain, struct fi_info *info,
_ep->sock_fd = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP);
if (_ep->sock_fd < 0) {
sock_debug(SOCK_ERROR,"%s: couldn't open DGRAM socket\n", __func__);
SOCK_LOG_ERROR("%s: couldn't open DGRAM socket\n", __func__);
errno = FI_ENODATA;
goto err1;
}
@ -832,16 +806,16 @@ int sock_dgram_ep(struct fid_domain *domain, struct fi_info *info,
si_me.sin_port = ((struct sockaddr_in *)(info->src_addr))->sin_port;
si_me.sin_addr.s_addr = htonl(INADDR_ANY);
if (bind(_ep->sock_fd, &si_me, sizeof(si_me)) == -1) {
sock_debug(SOCK_ERROR,"[sockd] %s: failed to bind sock_fd to port %d\n", __func__, ntohs(si_me.sin_port));
SOCK_LOG_ERROR("[sockd] %s: failed to bind sock_fd to port %d\n", __func__, ntohs(si_me.sin_port));
goto err2;
}
_ep->port_num = ntohs(si_me.sin_port);
if(!(_ep->send_list = new_list(SOCK_EP_SNDQ_LEN)))
if(!(_ep->send_list = new_list(SOCK_CQ_DEF_SZ)))
goto err2;
if(!(_ep->recv_list = new_list(SOCK_EP_RCVQ_LEN)))
if(!(_ep->recv_list = new_list(SOCK_CQ_DEF_SZ)))
goto err3;
/*

Просмотреть файл

@ -40,6 +40,76 @@
#include "sock.h"
#include "sock_util.h"
const struct fi_domain_attr sock_domain_attr = {
.name = NULL,
.threading = FI_THREAD_SAFE,
.control_progress = FI_PROGRESS_AUTO,
.data_progress = FI_PROGRESS_AUTO,
.mr_key_size = 0,
.cq_data_size = sizeof(uint64_t),
.ep_cnt = SOCK_EP_MAX_EP_CNT,
.tx_ctx_cnt = 0,
.rx_ctx_cnt = 0,
.max_ep_tx_ctx = SOCK_EP_MAX_TX_CNT,
.max_ep_rx_ctx = SOCK_EP_MAX_RX_CNT,
};
int sock_verify_domain_attr(struct fi_domain_attr *attr)
{
if(!attr)
return 0;
if(attr->name){
if (strcmp(attr->name, sock_dom_name))
return -FI_ENODATA;
}
switch(attr->threading){
case FI_THREAD_UNSPEC:
case FI_THREAD_SAFE:
case FI_THREAD_PROGRESS:
break;
default:
SOCK_LOG_INFO("Invalid threading model!\n");
return -FI_ENODATA;
}
switch (attr->control_progress){
case FI_PROGRESS_UNSPEC:
case FI_PROGRESS_AUTO:
break;
case FI_PROGRESS_MANUAL:
default:
SOCK_LOG_INFO("Control progress mode not supported!\n");
return -FI_ENODATA;
}
switch (attr->data_progress){
case FI_PROGRESS_UNSPEC:
case FI_PROGRESS_AUTO:
break;
case FI_PROGRESS_MANUAL:
default:
SOCK_LOG_INFO("Data progress mode not supported!\n");
return -FI_ENODATA;
}
if(attr->cq_data_size > sock_domain_attr.cq_data_size)
return -FI_ENODATA;
if(attr->ep_cnt > sock_domain_attr.ep_cnt)
return -FI_ENODATA;
if(attr->max_ep_tx_ctx > sock_domain_attr.max_ep_tx_ctx)
return -FI_ENODATA;
if(attr->max_ep_rx_ctx > sock_domain_attr.max_ep_rx_ctx)
return -FI_ENODATA;
return 0;
}
static int sock_dom_close(struct fid *fid)
{
@ -54,13 +124,6 @@ static int sock_dom_close(struct fid *fid)
return 0;
}
//static int sock_dom_query(struct fid_domain *domain, struct fi_domain_attr *attr)
//{
// attr->mr_key_size = 2; /* IDX_MAX_INDEX bits */
// attr->eq_data_size = sizeof(uint64_t);
// return 0;
//}
static uint16_t sock_get_mr_key(struct sock_domain *dom)
{
uint16_t i;
@ -91,20 +154,50 @@ static struct fi_ops sock_mr_fi_ops = {
.size = sizeof(struct fi_ops),
.close = sock_mr_close,
.bind = fi_no_bind,
.sync = fi_no_sync,
.control = fi_no_control,
.ops_open = fi_no_ops_open,
};
int sock_mr_verify_key(struct sock_domain *domain, uint16_t key,
void *buf, size_t len, uint64_t access)
{
int i;
struct sock_mr *mr;
mr = idm_lookup(&domain->mr_idm, key);
if (!mr)
return -FI_EINVAL;
for (i = 0; i < mr->iov_count; i++) {
if ((uintptr_t)buf >= (uintptr_t)mr->mr_iov[i].iov_base &&
((uintptr_t)buf + len <= (uintptr_t) mr->mr_iov[i].iov_base +
mr->mr_iov[i].iov_len)) {
if ((access & mr->access) == access)
return 0;
}
}
SOCK_LOG_ERROR("MR check failed\n");
return -FI_EINVAL;
}
int sock_mr_verify_desc(struct sock_domain *domain, void *desc,
void *buf, size_t len, uint64_t access)
{
uint64_t key = (uint64_t)desc;
return sock_mr_verify_key(domain, key, buf, len, access);
}
static int sock_regattr(struct fid_domain *domain, const struct fi_mr_attr *attr,
uint64_t flags, struct fid_mr **mr)
{
struct fi_eq_entry eq_entry;
struct sock_domain *dom;
struct sock_mr *_mr;
uint16_t key;
uint64_t key;
dom = container_of(domain, struct sock_domain, dom_fid);
if (!(dom->mode & FI_PROV_MR_KEY) && ((attr->requested_key > IDX_MAX_INDEX) ||
if (!(dom->info.mode & FI_PROV_MR_ATTR) &&
((attr->requested_key > IDX_MAX_INDEX) ||
idm_lookup(&dom->mr_idm, (int) attr->requested_key)))
return -FI_ENOKEY;
@ -123,18 +216,26 @@ static int sock_regattr(struct fid_domain *domain, const struct fi_mr_attr *attr
attr->offset : (uintptr_t) attr->mr_iov[0].iov_base;
fastlock_acquire(&dom->lock);
key = (dom->mode & FI_PROV_MR_KEY) ?
key = (dom->info.mode & FI_PROV_MR_ATTR) ?
sock_get_mr_key(dom) : (uint16_t) attr->requested_key;
if (idm_set(&dom->mr_idm, key, _mr) < 0)
goto err;
_mr->mr_fid.key = key;
_mr->mr_fid.mem_desc = (void *)key;
fastlock_release(&dom->lock);
_mr->iov_count = attr->iov_count;
memcpy(&_mr->mr_iov, attr->mr_iov, sizeof(_mr->mr_iov) * attr->iov_count);
*mr = &_mr->mr_fid;
/* TODO: async */
if (dom->mr_eq) {
eq_entry.fid = &domain->fid;
eq_entry.context = attr->context;
return sock_eq_report_event(dom->mr_eq, FI_COMPLETE,
&eq_entry, sizeof(eq_entry), 0);
}
return 0;
err:
@ -174,23 +275,20 @@ static int sock_reg(struct fid_domain *domain, const void *buf, size_t len,
int sock_dom_bind(struct fid *fid, struct fid *bfid, uint64_t flags)
{
return -FI_ENOSYS;
}
struct sock_domain *dom;
struct sock_eq *eq;
int sock_dom_sync(struct fid *fid, uint64_t flags, void *context)
{
return -FI_ENOSYS;
}
dom = container_of(fid, struct sock_domain, dom_fid.fid);
eq = container_of(bfid, struct sock_eq, eq.fid);
int sock_dom_control(struct fid *fid, int command, void *arg)
{
return -FI_ENOSYS;
}
if (dom->eq)
return -FI_EINVAL;
int sock_dom_ops_open(struct fid *fid, const char *name,
uint64_t flags, void **ops, void *context)
{
return -FI_ENOSYS;
dom->eq = eq;
if (flags & FI_REG_MR)
dom->mr_eq = eq;
return 0;
}
int sock_endpoint(struct fid_domain *domain, struct fi_info *info,
@ -209,8 +307,7 @@ int sock_endpoint(struct fid_domain *domain, struct fi_info *info,
static struct fi_ops sock_dom_fi_ops = {
.size = sizeof(struct fi_ops),
.close = sock_dom_close,
.bind = fi_no_bind,
.sync = fi_no_sync,
.bind = sock_dom_bind,
.control = fi_no_control,
.ops_open = fi_no_ops_open,
};
@ -245,7 +342,7 @@ int _sock_verify_domain_attr(struct fi_domain_attr *attr)
case FI_THREAD_PROGRESS:
break;
default:
sock_debug(SOCK_INFO, "Invalid threading model!\n");
SOCK_LOG_INFO("Invalid threading model!\n");
return -FI_ENODATA;
}
@ -256,7 +353,7 @@ int _sock_verify_domain_attr(struct fi_domain_attr *attr)
case FI_PROGRESS_MANUAL:
default:
sock_debug(SOCK_INFO, "Control progress mode not supported!\n");
SOCK_LOG_INFO("Control progress mode not supported!\n");
return -FI_ENODATA;
}
@ -267,14 +364,14 @@ int _sock_verify_domain_attr(struct fi_domain_attr *attr)
case FI_PROGRESS_MANUAL:
default:
sock_debug(SOCK_INFO, "Data progress mode not supported!\n");
SOCK_LOG_INFO("Data progress mode not supported!\n");
return -FI_ENODATA;
}
if(attr->max_ep_tx_ctx > SOCK_EP_TX_CTX_CNT)
if(attr->max_ep_tx_ctx > SOCK_EP_MAX_TX_CNT)
return -FI_ENODATA;
if(attr->max_ep_rx_ctx > SOCK_EP_RX_CTX_CNT)
if(attr->max_ep_rx_ctx > SOCK_EP_MAX_RX_CNT)
return -FI_ENODATA;
return 0;

Просмотреть файл

@ -120,7 +120,7 @@ int sock_ep_listen(struct fid_pep *pep)
struct sock_pep *sock_pep;
sock_pep = container_of(pep, struct sock_pep, pep);
ret = listen(sock_pep->sock_fd, SOCK_EP_BACKLOG);
ret = listen(sock_pep->sock_fd, 0);
if(ret)
return -errno;
return 0;
@ -169,7 +169,7 @@ struct fi_ops_cm sock_cm_ops = {
};
ssize_t sock_ep_recv(struct fid_ep *ep, void *buf, size_t len, void *desc,
void *context)
fi_addr_t src_addr, void *context)
{
/*
struct sock_ep *sock_ep;
@ -188,13 +188,7 @@ ssize_t sock_ep_recv(struct fid_ep *ep, void *buf, size_t len, void *desc,
}
ssize_t sock_ep_recvv(struct fid_ep *ep, const struct iovec *iov, void **desc,
size_t count, void *context)
{
return 0;
}
ssize_t sock_ep_recvfrom(struct fid_ep *ep, void *buf, size_t len, void *desc,
fi_addr_t src_addr, void *context)
size_t count, fi_addr_t src_addr, void *context)
{
return 0;
}
@ -206,19 +200,13 @@ ssize_t sock_ep_recvmsg(struct fid_ep *ep, const struct fi_msg *msg,
}
ssize_t sock_ep_send(struct fid_ep *ep, const void *buf, size_t len, void *desc,
void *context)
fi_addr_t dest_addr, void *context)
{
return 0;
}
ssize_t sock_ep_sendv(struct fid_ep *ep, const struct iovec *iov, void **desc,
size_t count, void *context)
{
return 0;
}
ssize_t sock_ep_sendto(struct fid_ep *ep, const void *buf, size_t len, void *desc,
fi_addr_t dest_addr, void *context)
size_t count, fi_addr_t dest_addr, void *context)
{
return 0;
}
@ -229,25 +217,14 @@ ssize_t sock_ep_sendmsg(struct fid_ep *ep, const struct fi_msg *msg,
return 0;
}
ssize_t sock_ep_inject(struct fid_ep *ep, const void *buf, size_t len)
ssize_t sock_ep_inject(struct fid_ep *ep, const void *buf, size_t len,
fi_addr_t dest_addr)
{
return 0;
}
ssize_t sock_ep_injectto(struct fid_ep *ep, const void *buf, size_t len,
fi_addr_t dest_addr)
{
return 0;
}
ssize_t sock_ep_senddata(struct fid_ep *ep, const void *buf, size_t len, void *desc,
uint64_t data, void *context)
{
return 0;
}
ssize_t sock_ep_senddatato(struct fid_ep *ep, const void *buf, size_t len,
void *desc, uint64_t data, fi_addr_t dest_addr, void *context)
ssize_t sock_ep_senddata(struct fid_ep *ep, const void *buf, size_t len,
void *desc, uint64_t data, fi_addr_t dest_addr, void *context)
{
return 0;
}
@ -262,15 +239,11 @@ struct fi_ops_msg sock_msg_ops = {
.size = sizeof(struct fi_ops_msg),
.recv = NULL,
.recvv = NULL,
.recvfrom = NULL,
.recvmsg = NULL,
.send = NULL,
.sendv = NULL,
.sendto = NULL,
.sendmsg = NULL,
.inject = NULL,
.injectto = NULL,
.senddata = NULL,
.senddatato = NULL,
};

Просмотреть файл

@ -98,7 +98,7 @@ ssize_t sock_eq_read(struct fid_eq *eq, uint32_t *event, void *buf, size_t len,
}
ssize_t sock_eq_readerr(struct fid_eq *eq, struct fi_eq_err_entry *buf,
size_t len, uint64_t flags)
uint64_t flags)
{
int ret;
struct sock_eq *sock_eq;
@ -116,11 +116,6 @@ ssize_t sock_eq_readerr(struct fid_eq *eq, struct fi_eq_err_entry *buf,
list = sock_eq->err_list.list.next;
entry = container_of(list, struct sock_eq_entry, entry);
if(entry->len > len) {
ret = -FI_ETOOSMALL;
goto out;
}
ret = entry->len;
memcpy(buf, entry->event, entry->len);
@ -191,7 +186,7 @@ static ssize_t sock_eq_write(struct fid_eq *eq, uint32_t event,
}
const char * sock_eq_strerror(struct fid_eq *eq, int prov_errno,
const void *err_data, void *buf, size_t len)
const void *err_data, char *buf, size_t len)
{
if (buf && len)
return strncpy(buf, strerror(prov_errno), len);
@ -244,7 +239,6 @@ static struct fi_ops sock_eq_fi_ops = {
.size = sizeof(struct fi_ops),
.close = sock_eq_fi_close,
.bind = fi_no_bind,
.sync = fi_no_sync,
.control = sock_eq_fi_control,
.ops_open = fi_no_ops_open,
};
@ -269,7 +263,7 @@ static int _sock_eq_verify_attr(struct fi_eq_attr *attr)
}
static struct fi_eq_attr _sock_eq_def_attr ={
.size = SOCK_EQ_DEF_LEN,
.size = SOCK_EQ_DEF_SZ,
.flags = 0,
.wait_obj = FI_WAIT_FD,
.signaling_vector = 0,

Просмотреть файл

@ -43,14 +43,24 @@
const char const sock_fab_name[] = "IP";
const char const sock_dom_name[] = "sockets";
int _sock_verify_fabric_attr(struct fi_fabric_attr *attr)
const struct fi_fabric_attr sock_fabric_attr = {
.fabric = NULL,
.name = NULL,
.prov_name = NULL,
.prov_version = FI_VERSION(SOCK_MAJOR_VERSION, SOCK_MINOR_VERSION),
};
int sock_verify_fabric_attr(struct fi_fabric_attr *attr)
{
if (!attr)
return 0;
if (attr->name &&
strcmp(attr->name, sock_fab_name))
return -FI_ENODATA;
if(attr->prov_version){
if(attr->prov_version !=
if (attr->prov_version) {
if (attr->prov_version !=
FI_VERSION(SOCK_MAJOR_VERSION, SOCK_MINOR_VERSION))
return -FI_ENODATA;
}
@ -58,6 +68,46 @@ int _sock_verify_fabric_attr(struct fi_fabric_attr *attr)
return 0;
}
int sock_verify_info(struct fi_info *hints)
{
int ret;
if (!hints)
return 0;
switch (hints->ep_type) {
case FI_EP_UNSPEC:
case FI_EP_MSG:
case FI_EP_DGRAM:
case FI_EP_RDM:
break;
default:
return -FI_ENODATA;
}
switch (hints->addr_format) {
case FI_ADDR_UNSPEC:
case FI_SOCKADDR:
case FI_SOCKADDR_IN:
break;
default:
return -FI_ENODATA;
}
if (!sock_rdm_verify_ep_attr(hints->ep_attr,
hints->tx_attr, hints->rx_attr))
return 0;
ret = sock_verify_domain_attr(hints->domain_attr);
if (ret)
return ret;
ret = sock_verify_fabric_attr(hints->fabric_attr);
if (ret)
return ret;
return 0;
}
static struct fi_ops_fabric sock_fab_ops = {
.size = sizeof(struct fi_ops_fabric),
.domain = sock_domain,
@ -70,7 +120,7 @@ static int sock_fabric_close(fid_t fid)
struct sock_fabric *fab;
fab = container_of(fid, struct sock_fabric, fab_fid);
if(atomic_get(&fab->ref)) {
if (atomic_get(&fab->ref)) {
return -FI_EBUSY;
}
@ -83,11 +133,6 @@ int sock_fabric_bind(struct fid *fid, struct fid *bfid, uint64_t flags)
return -FI_ENOSYS;
}
int sock_fabric_sync(struct fid *fid, uint64_t flags, void *context)
{
return -FI_ENOSYS;
}
int sock_fabric_control(struct fid *fid, int command, void *arg)
{
return -FI_ENOSYS;
@ -103,7 +148,6 @@ static struct fi_ops sock_fab_fi_ops = {
.size = sizeof(struct fi_ops),
.close = sock_fabric_close,
.bind = sock_fabric_bind,
.sync = sock_fabric_sync,
.control = sock_fabric_control,
.ops_open = sock_fabric_ops_open,
};
@ -137,6 +181,10 @@ static int sock_getinfo(uint32_t version, const char *node, const char *service,
return -FI_ENODATA;
ret = sock_verify_info(hints);
if (ret)
return ret;
if (hints) {
switch (hints->ep_type) {
case FI_EP_RDM:
@ -153,19 +201,19 @@ static int sock_getinfo(uint32_t version, const char *node, const char *service,
ret = sock_rdm_getinfo(version, node, service, flags,
hints, &_info);
if(ret == 0){
if (ret == 0) {
*info = tmp = _info;
while(tmp->next != NULL)
tmp=tmp->next;
}else if (-FI_ENODATA == ret){
} else if (ret == -FI_ENODATA) {
tmp = NULL;
}else
} else
return ret;
ret = sock_dgram_getinfo(version, node, service, flags,
hints, &_info);
if(NULL != tmp){
if (NULL != tmp) {
tmp->next = _info;
return ret;
}
@ -174,35 +222,20 @@ static int sock_getinfo(uint32_t version, const char *node, const char *service,
return ret;
}
int sock_freeinfo(struct fi_info *info)
{
if(info)
free(info);
return 0;
}
int sock_free_info(struct fi_info *info)
{
free_fi_info(info);
return 0;
}
struct fi_provider sock_prov = {
.name = "IP",
.version = FI_VERSION(SOCK_MAJOR_VERSION, SOCK_MINOR_VERSION),
.getinfo = sock_getinfo,
.freeinfo = sock_free_info,
.fabric = sock_fabric,
};
static void __attribute__((constructor)) sock_ini(void)
{
char *tmp = getenv("SFI_SOCK_DEBUG_LEVEL");
if (tmp){
sock_debug_level = atoi(tmp);
}else{
sock_debug_level = SOCK_ERROR;
if (tmp) {
sock_log_level = atoi(tmp);
} else {
sock_log_level = SOCK_ERROR;
}
(void) fi_register(&sock_prov);

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -1,111 +0,0 @@
/*
* Copyright (c) 2014 Intel Corporation. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenFabrics.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#if HAVE_CONFIG_H
# include <config.h>
#endif /* HAVE_CONFIG_H */
#include <stdlib.h>
#include <string.h>
#include "sock.h"
struct sock_rxtx *sock_rxtx_alloc(size_t size)
{
struct sock_rxtx *rxtx;
rxtx = calloc(sizeof(*rxtx), 1);
if (!rxtx)
return NULL;
if (rbfdinit(&rxtx->rbfd, size))
goto err;
fastlock_init(&rxtx->rlock);
fastlock_init(&rxtx->wlock);
return rxtx;
err:
free(rxtx);
return NULL;
}
void sock_rxtx_free(struct sock_rxtx *rxtx)
{
fastlock_destroy(&rxtx->rlock);
fastlock_destroy(&rxtx->wlock);
rbfdfree(&rxtx->rbfd);
free(rxtx);
}
void sock_rxtx_start(struct sock_rxtx *rxtx)
{
fastlock_acquire(&rxtx->wlock);
}
int sock_rxtx_write(struct sock_rxtx *rxtx, const void *buf, size_t len)
{
if (rbfdavail(&rxtx->rbfd) < len)
return -FI_EAGAIN;
rbfdwrite(&rxtx->rbfd, buf, len);
return 0;
}
void sock_rxtx_commit(struct sock_rxtx *rxtx)
{
rbfdcommit(&rxtx->rbfd);
fastlock_release(&rxtx->rlock);
}
void sock_rxtx_abort(struct sock_rxtx *rxtx)
{
rbfdabort(&rxtx->rbfd);
fastlock_release(&rxtx->rlock);
}
int sock_rxtx_read(struct sock_rxtx *rxtx, void *buf, size_t len)
{
int ret;
fastlock_acquire(&rxtx->rlock);
if (rbfdused(&rxtx->rbfd) >= len) {
rbfdread(&rxtx->rbfd, buf, len);
ret = 0;
} else {
ret = -FI_EAGAIN;
}
fastlock_release(&rxtx->rlock);
return ret;
}

Просмотреть файл

@ -54,27 +54,4 @@
#include "sock.h"
#include "sock_util.h"
int sock_debug_level = SOCK_ERROR;
void sock_debug(int level, char *fmt, ...)
{
va_list ap;
if(level >= sock_debug_level){
switch(level){
case SOCK_INFO:
fprintf(stderr, "SOCK_INFO: ");
break;
case SOCK_WARN:
fprintf(stderr, "SOCK_WARN: ");
break;
case SOCK_ERROR:
default:
fprintf(stderr, "SOCK_ERROR: ");
break;
}
va_start(ap, fmt);
vfprintf(stderr, fmt, ap);
va_end(ap);
}
}
int sock_log_level = SOCK_ERROR;

Просмотреть файл

@ -37,12 +37,33 @@
#ifndef _SOCK_UTIL_H_
#define _SOCK_UTIL_H_
#include <stdio.h>
#define SOCK_ERROR (1)
#define SOCK_WARN (2)
#define SOCK_INFO (3)
extern int sock_debug_level;
extern int sock_log_level;
void sock_debug(int level, char *fmt, ...);
#define SOCK_LOG_INFO(...) do { \
if (sock_log_level <= SOCK_INFO) { \
fprintf(stderr, "[SOCK_INFO - %s]: ", __func__); \
fprintf(stderr, __VA_ARGS__); \
} \
} while (0)
#define SOCK_LOG_WARN(...) do { \
if (sock_log_level <= SOCK_WARN) { \
fprintf(stderr, "[SOCK_WARN - %s]: ", __func__); \
fprintf(stderr, __VA_ARGS__); \
} \
} while (0)
#define SOCK_LOG_ERROR(...) do { \
if (sock_log_level <= SOCK_ERROR) { \
fprintf(stderr, "[SOCK_ERROR - %s]: ", __func__); \
fprintf(stderr, __VA_ARGS__); \
} \
} while (0)
#endif

Просмотреть файл

@ -216,7 +216,8 @@ int usdf_pep_open(struct fid_fabric *fabric, struct fi_info *info,
struct fid_pep **pep_p, void *context);
/* fi_ops_domain */
int usdf_cq_open();
int usdf_cq_open(struct fid_domain *domain, struct fi_cq_attr *attr,
struct fid_cq **cq_o, void *context);
int usdf_endpoint_open(struct fid_domain *domain, struct fi_info *info,
struct fid_ep **ep, void *context);
int usdf_av_open(struct fid_domain *domain, struct fi_av_attr *attr,

Просмотреть файл

@ -58,6 +58,7 @@
#include "libnl_utils.h"
#include "usd.h"
#include "usd_queue.h"
#include "usd_dest.h"
#include "usdf.h"
#include "usdf_av.h"
@ -121,7 +122,7 @@ usdf_post_insert_request_error(struct usdf_av_insert *insert,
/*
* Called by progression thread to look for AV completions on this domain
*/
void
static void
usdf_av_insert_progress(void *v)
{
int ret;
@ -273,9 +274,9 @@ usdf_am_insert_async(struct fid_av *fav, const void *addr, size_t count,
}
} else {
if (req->avr_daddr_be == 0) {
req->avr_daddr_be = sin->sin_addr.s_addr;
}
if (req->avr_daddr_be == 0) {
req->avr_daddr_be = sin->sin_addr.s_addr;
}
req->avr_dest = calloc(1, sizeof(*req->avr_dest));
if (req->avr_dest == NULL) {
ret = -FI_ENOMEM;
@ -491,7 +492,6 @@ static struct fi_ops usdf_av_fi_ops = {
.close = usdf_av_close,
.bind = usdf_av_bind,
.control = fi_no_control,
.sync = fi_no_sync,
.ops_open = usdf_av_ops_open,
};

Просмотреть файл

@ -76,8 +76,4 @@ struct usdf_av {
#define av_fidtou(FID) container_of(FID, struct usdf_av, av_fid.fid)
#define av_utof(AV) (&(AV)->av_fid)
/* USD routines we use */
void usd_fill_udp_dest(struct usd_dest *dest, struct usd_device_attrs *dap,
uint32_t daddr_be, uint16_t dport_be);
#endif /* _USDF_AV_H_ */

Просмотреть файл

@ -58,6 +58,22 @@
#include "usnic_direct.h"
#include "usdf.h"
#include "usdf_dgram.h"
#include "usdf_cm.h"
#include "usdf_msg.h"
static struct fi_ops_msg usdf_dgram_conn_ops = {
.size = sizeof(struct fi_ops_msg),
.recv = usdf_dgram_recv,
.recvv = usdf_dgram_recvv,
.recvmsg = usdf_dgram_recvmsg,
.send = usdf_dgram_conn_send,
.sendv = usdf_dgram_sendv,
.sendmsg = usdf_dgram_sendmsg,
.inject = usdf_dgram_inject,
.senddata = usdf_dgram_senddata,
};
int
usdf_cm_dgram_connect(struct fid_ep *fep, const void *addr,
@ -72,6 +88,9 @@ usdf_cm_dgram_connect(struct fid_ep *fep, const void *addr,
ret = usd_create_dest(ep->ep_domain->dom_dev, sin->sin_addr.s_addr,
sin->sin_port, &ep->ep_dest);
if (!ret) {
ep->ep_fid.msg = &usdf_dgram_conn_ops;
}
return ret;
}

Просмотреть файл

@ -65,7 +65,7 @@
static ssize_t
usdf_cq_readerr(struct fid_cq *fcq, struct fi_cq_err_entry *entry,
size_t len, uint64_t flags)
uint64_t flags)
{
struct usdf_cq *cq;
@ -76,10 +76,6 @@ usdf_cq_readerr(struct fid_cq *fcq, struct fi_cq_err_entry *entry,
return 0;
}
if (len < sizeof(*entry)) {
return -FI_ETOOSMALL;
}
entry->op_context = cq->cq_comp.uc_context;
entry->flags = 0;
entry->err = FI_EIO;
@ -291,10 +287,10 @@ usdf_cq_read_data(struct fid_cq *fcq, void *buf, size_t count)
static const char *
usdf_cq_strerror(struct fid_cq *eq, int prov_errno, const void *err_data,
void *buf, size_t len)
char *buf, size_t len)
{
strncpy(buf, "CQ Error", len-1);
((char *)buf)[len-1] = '\0';
buf[len-1] = '\0';
return buf;
}
@ -371,8 +367,7 @@ usdf_cq_open(struct fid_domain *domain, struct fi_cq_attr *attr,
cq->cq_domain = container_of(domain, struct usdf_domain, dom_fid);
ret = usd_create_cq(cq->cq_domain->dom_dev, attr->size, USD_CQ_NO_GROUP,
-1, &cq->cq_cq);
ret = usd_create_cq(cq->cq_domain->dom_dev, attr->size, -1, &cq->cq_cq);
if (ret != 0) {
goto fail;
}

Просмотреть файл

@ -61,10 +61,11 @@
#include "usd.h"
#include "usd_post.h"
#include "usdf.h"
#include "usdf_dgram.h"
ssize_t
usdf_dgram_recv(struct fid_ep *fep, void *buf, size_t len,
void *desc, void *context)
void *desc, fi_addr_t src_addr, void *context)
{
struct usdf_ep *ep;
struct usd_qp_impl *qp;
@ -94,7 +95,7 @@ usdf_dgram_recv(struct fid_ep *fep, void *buf, size_t len,
ssize_t
usdf_dgram_recvv(struct fid_ep *fep, const struct iovec *iov, void **desc,
size_t count, void *context)
size_t count, fi_addr_t src_addr, void *context)
{
struct usdf_ep *ep;
struct usd_recv_desc rxd;
@ -122,15 +123,8 @@ usdf_dgram_recvv(struct fid_ep *fep, const struct iovec *iov, void **desc,
return usd_post_recv(ep->ep_qp, &rxd);
}
ssize_t
usdf_dgram_recvfrom(struct fid_ep *fep, void *buf, size_t len, void *desc,
fi_addr_t src_addr, void *context)
{
return -FI_ENOSYS;
}
static inline ssize_t
_usdf_dgram_sendto(struct usdf_ep *ep, struct usd_dest *dest,
_usdf_dgram_send(struct usdf_ep *ep, struct usd_dest *dest,
const void *buf, size_t len, void *context)
{
if (len <= USD_SEND_MAX_COPY - sizeof(struct usd_udp_hdr)) {
@ -143,50 +137,40 @@ _usdf_dgram_sendto(struct usdf_ep *ep, struct usd_dest *dest,
}
ssize_t
usdf_dgram_sendto(struct fid_ep *fep, const void *buf, size_t len, void *desc,
fi_addr_t dest_addr, void *context)
usdf_dgram_send(struct fid_ep *fep, const void *buf, size_t len, void *desc,
fi_addr_t dest_addr, void *context)
{
struct usdf_ep *ep;
struct usd_dest *dest;
int ret;
ep = ep_ftou(fep);
dest = (struct usd_dest *)(uintptr_t)dest_addr;
return _usdf_dgram_sendto(ep, dest, buf, len, context);
return ret;
dest = (struct usd_dest *)(uintptr_t) dest_addr;
return _usdf_dgram_send(ep, dest, buf, len, context);
}
ssize_t
usdf_dgram_send(struct fid_ep *fep, const void *buf, size_t len,
void *desc, void *context)
usdf_dgram_conn_send(struct fid_ep *fep, const void *buf, size_t len,
void *desc, fi_addr_t dest_addr, void *context)
{
struct usdf_ep *ep;
struct usd_dest *dest;
int ret;
ep = ep_ftou(fep);
dest = ep->ep_dest;
if (dest == NULL) {
return -FI_ENOTCONN;
}
return _usdf_dgram_sendto(ep, dest, buf, len, context);
return ret;
return _usdf_dgram_send(ep, ep->ep_dest, buf, len, context);
}
ssize_t
usdf_dgram_senddata(struct fid_ep *ep, const void *buf, size_t len,
void *desc, uint64_t data, void *context)
void *desc, uint64_t data, fi_addr_t dest_addr,
void *context)
{
return -FI_ENOSYS;
}
ssize_t
usdf_dgram_sendv(struct fid_ep *ep, const struct iovec *iov, void **desc,
size_t count, void *context)
size_t count, fi_addr_t dest_addr, void *context)
{
return -FI_ENOSYS;
}
@ -198,22 +182,8 @@ usdf_dgram_sendmsg(struct fid_ep *ep, const struct fi_msg *msg, uint64_t flags)
}
ssize_t
usdf_dgram_inject(struct fid_ep *ep, const void *buf, size_t len)
{
return -FI_ENOSYS;
}
ssize_t
usdf_dgram_injectto(struct fid_ep *ep, const void *buf, size_t len,
fi_addr_t dest_addr)
{
return -FI_ENOSYS;
}
ssize_t
usdf_dgram_senddatato(struct fid_ep *ep, const void *buf, size_t len,
void *desc, uint64_t data, fi_addr_t dest_addr,
void *context)
usdf_dgram_inject(struct fid_ep *ep, const void *buf, size_t len,
fi_addr_t dest_addr)
{
return -FI_ENOSYS;
}
@ -229,7 +199,7 @@ usdf_dgram_recvmsg(struct fid_ep *ep, const struct fi_msg *msg, uint64_t flags)
*/
ssize_t
usdf_dgram_prefix_recv(struct fid_ep *fep, void *buf, size_t len,
void *desc, void *context)
void *desc, fi_addr_t src_addr, void *context)
{
struct usdf_ep *ep;
struct usd_qp_impl *qp;
@ -254,7 +224,7 @@ usdf_dgram_prefix_recv(struct fid_ep *fep, void *buf, size_t len,
ssize_t
usdf_dgram_prefix_recvv(struct fid_ep *fep, const struct iovec *iov,
void **desc, size_t count, void *context)
void **desc, size_t count, fi_addr_t src_addr, void *context)
{
struct usdf_ep *ep;
struct usd_recv_desc rxd;
@ -283,7 +253,7 @@ usdf_dgram_prefix_recvv(struct fid_ep *fep, const struct iovec *iov,
}
ssize_t
usdf_dgram_prefix_sendto(struct fid_ep *fep, const void *buf, size_t len,
usdf_dgram_prefix_send(struct fid_ep *fep, const void *buf, size_t len,
void *desc, fi_addr_t dest_addr, void *context)
{
struct usdf_ep *ep;

Просмотреть файл

@ -43,34 +43,29 @@ int usdf_cm_dgram_shutdown(struct fid_ep *ep, uint64_t flags);
/* fi_ops_msg for DGRAM */
ssize_t usdf_dgram_recv(struct fid_ep *ep, void *buf, size_t len, void *desc,
void *context);
fi_addr_t src_addr, void *context);
ssize_t usdf_dgram_recvv(struct fid_ep *ep, const struct iovec *iov,
void **desc, size_t count, void *context);
ssize_t usdf_dgram_recvfrom(struct fid_ep *ep, void *buf, size_t len,
void *desc, fi_addr_t src_addr, void *context);
void **desc, size_t count, fi_addr_t src_addr, void *context);
ssize_t usdf_dgram_recvmsg(struct fid_ep *ep, const struct fi_msg *msg,
uint64_t flags);
ssize_t usdf_dgram_send(struct fid_ep *ep, const void *buf, size_t len,
void *desc, void *context);
ssize_t usdf_dgram_sendv(struct fid_ep *ep, const struct iovec *iov,
void **desc, size_t count, void *context);
ssize_t usdf_dgram_sendto(struct fid_ep *ep, const void *buf, size_t len,
void *desc, fi_addr_t dest_addr, void *context);
ssize_t usdf_dgram_conn_send(struct fid_ep *ep, const void *buf, size_t len,
void *desc, fi_addr_t dest_addr, void *context);
ssize_t usdf_dgram_sendv(struct fid_ep *ep, const struct iovec *iov,
void **desc, size_t count, fi_addr_t dest_addr, void *context);
ssize_t usdf_dgram_sendmsg(struct fid_ep *ep, const struct fi_msg *msg,
uint64_t flags);
ssize_t usdf_dgram_inject(struct fid_ep *ep, const void *buf, size_t len);
ssize_t usdf_dgram_injectto(struct fid_ep *ep, const void *buf, size_t len,
ssize_t usdf_dgram_inject(struct fid_ep *ep, const void *buf, size_t len,
fi_addr_t dest_addr);
ssize_t usdf_dgram_senddata(struct fid_ep *ep, const void *buf, size_t len,
void *desc, uint64_t data, void *context);
ssize_t usdf_dgram_senddatato(struct fid_ep *ep, const void *buf, size_t len,
void *desc, uint64_t data, fi_addr_t dest_addr, void *context);
ssize_t usdf_dgram_prefix_recv(struct fid_ep *ep, void *buf, size_t len,
void *desc, void *context);
void *desc, fi_addr_t src_addr, void *context);
ssize_t usdf_dgram_prefix_recvv(struct fid_ep *ep, const struct iovec *iov,
void **desc, size_t count, void *context);
ssize_t usdf_dgram_prefix_sendto(struct fid_ep *ep, const void *buf, size_t len,
void **desc, size_t count, fi_addr_t src_addr, void *context);
ssize_t usdf_dgram_prefix_send(struct fid_ep *ep, const void *buf, size_t len,
void *desc, fi_addr_t dest_addr, void *context);
#endif /* _USDF_DGRAM_H_ */

Просмотреть файл

@ -109,7 +109,6 @@ static struct fi_ops usdf_fid_ops = {
.size = sizeof(struct fi_ops),
.close = usdf_domain_close,
.bind = usdf_domain_bind,
.sync = fi_no_sync,
.ops_open = fi_no_ops_open,
};

Просмотреть файл

@ -139,7 +139,6 @@ struct fi_ops usdf_ep_ops = {
.size = sizeof(struct fi_ops),
.close = usdf_ep_close,
.bind = usdf_ep_bind,
.sync = fi_no_sync,
.control = fi_no_control,
.ops_open = fi_no_ops_open
};

Просмотреть файл

@ -152,32 +152,24 @@ static struct fi_ops_msg usdf_dgram_ops = {
.size = sizeof(struct fi_ops_msg),
.recv = usdf_dgram_recv,
.recvv = usdf_dgram_recvv,
.recvfrom = usdf_dgram_recvfrom,
.recvmsg = usdf_dgram_recvmsg,
.send = usdf_dgram_send,
.sendv = usdf_dgram_sendv,
.sendto = usdf_dgram_sendto,
.sendmsg = usdf_dgram_sendmsg,
.inject = usdf_dgram_inject,
.injectto = usdf_dgram_injectto,
.senddata = usdf_dgram_senddata,
.senddatato = usdf_dgram_senddatato
};
static struct fi_ops_msg usdf_dgram_prefix_ops = {
.size = sizeof(struct fi_ops_msg),
.recv = usdf_dgram_prefix_recv,
.recvv = usdf_dgram_prefix_recvv,
.recvfrom = usdf_dgram_recvfrom,
.recvmsg = usdf_dgram_recvmsg,
.send = usdf_dgram_send,
.sendv = usdf_dgram_sendv,
.sendto = usdf_dgram_prefix_sendto,
.sendmsg = usdf_dgram_sendmsg,
.inject = usdf_dgram_inject,
.injectto = usdf_dgram_injectto,
.senddata = usdf_dgram_senddata,
.senddatato = usdf_dgram_senddatato
};
static struct fi_ops_cm usdf_cm_dgram_ops = {

Просмотреть файл

@ -182,16 +182,12 @@ static struct fi_ops_msg usdf_msg_ops = {
.size = sizeof(struct fi_ops_msg),
.recv = usdf_msg_recv,
.recvv = usdf_msg_recvv,
.recvfrom = fi_no_msg_recvfrom,
.recvmsg = usdf_msg_recvmsg,
.send = usdf_msg_send,
.sendv = usdf_msg_sendv,
.sendto = fi_no_msg_sendto,
.sendmsg = usdf_msg_sendmsg,
.inject = usdf_msg_inject,
.injectto = fi_no_msg_injectto,
.senddata = usdf_msg_senddata,
.senddatato = fi_no_msg_senddatato
};
int

Просмотреть файл

@ -156,7 +156,7 @@ usdf_eq_write_event(struct usdf_eq *eq, uint32_t event,
}
static ssize_t
usdf_eq_readerr(struct fid_eq *feq, struct fi_eq_err_entry *entry, size_t len,
usdf_eq_readerr(struct fid_eq *feq, struct fi_eq_err_entry *entry,
uint64_t flags)
{
struct usdf_eq *eq;
@ -426,7 +426,7 @@ done:
static const char *
usdf_eq_strerror(struct fid_eq *feq, int prov_errno, const void *err_data,
void *buf, size_t len)
char *buf, size_t len)
{
return NULL;
}
@ -492,7 +492,6 @@ static struct fi_ops usdf_eq_fi_ops = {
.size = sizeof(struct fi_ops),
.close = usdf_eq_close,
.bind = fi_no_bind,
.sync = fi_no_sync,
.control = usdf_eq_control,
.ops_open = fi_no_ops_open,
};

Просмотреть файл

@ -71,13 +71,6 @@
struct usdf_usnic_info *__usdf_devinfo;
static int
usdf_freeinfo(struct fi_info *info)
{
fi_freeinfo_internal(info);
return 0;
}
static int
usdf_validate_hints(struct fi_info *hints, struct usd_device_attrs *dap)
{
@ -184,25 +177,6 @@ fail:
return ret; // fi_freeinfo() in caller frees all
}
static struct fi_info *
usdf_allocinfo(void)
{
struct fi_info *fi;
fi = fi_allocinfo_internal();
if (fi == NULL) {
goto fail;
}
return fi;
fail:
if (fi != NULL) {
fi_freeinfo_internal(fi);
}
return NULL;
}
static int
usdf_fill_info_dgram(
struct fi_info *hints,
@ -230,7 +204,7 @@ usdf_fill_info_dgram(
return -FI_ENODATA;
}
fi = usdf_allocinfo();
fi = fi_allocinfo_internal();
if (fi == NULL) {
ret = -FI_ENOMEM;
goto fail;
@ -307,7 +281,7 @@ usdf_fill_info_dgram(
fail:
if (fi != NULL) {
fi_freeinfo_internal(fi);
fi_freeinfo(fi);
}
return ret;
}
@ -339,7 +313,7 @@ usdf_fill_info_msg(
return -FI_ENODATA;
}
fi = usdf_allocinfo();
fi = fi_allocinfo_internal();
if (fi == NULL) {
ret = -FI_ENOMEM;
goto fail;
@ -414,13 +388,13 @@ usdf_fill_info_msg(
fail:
if (fi != NULL) {
fi_freeinfo_internal(fi);
fi_freeinfo(fi);
}
return ret;
}
static int
usdf_get_devinfo()
usdf_get_devinfo(void)
{
struct usdf_usnic_info *dp;
struct usdf_dev_entry *dep;
@ -462,7 +436,7 @@ fail:
return ret;
}
int
static int
usdf_get_distance(
struct usd_device_attrs *dap,
uint32_t daddr_be,
@ -494,7 +468,6 @@ usdf_getinfo(uint32_t version, const char *node, const char *service,
struct usd_device_attrs *dap;
struct fi_info *fi_first;
struct fi_info *fi_last;
struct fi_info *fi_next;
struct addrinfo *ai;
struct sockaddr_in *src;
struct sockaddr_in *dest;
@ -599,11 +572,7 @@ usdf_getinfo(uint32_t version, const char *node, const char *service,
fail:
if (ret != 0) {
while (fi_first != NULL) {
fi_next = fi_first->next;
fi_freeinfo_internal(fi_first);
fi_first = fi_next;
}
fi_freeinfo(fi_first);
}
if (ai != NULL) {
freeaddrinfo(ai);
@ -679,7 +648,6 @@ static struct fi_ops usdf_fi_ops = {
.size = sizeof(struct fi_ops),
.close = usdf_fabric_close,
.bind = fi_no_bind,
.sync = fi_no_sync,
.control = fi_no_control,
.ops_open = usdf_fabric_ops_open,
};
@ -691,7 +659,7 @@ static struct fi_ops_fabric usdf_ops_fabric = {
.eq_open = usdf_eq_open,
};
int
static int
usdf_fabric_open(struct fi_fabric_attr *fattrp, struct fid_fabric **fabric,
void *context)
{
@ -802,7 +770,6 @@ static struct fi_provider usdf_ops = {
.name = USDF_FI_NAME,
.version = FI_VERSION(0, 7),
.getinfo = usdf_getinfo,
.freeinfo = usdf_freeinfo,
.fabric = usdf_fabric_open,
};

Просмотреть файл

@ -63,7 +63,7 @@
ssize_t
usdf_msg_recv(struct fid_ep *fep, void *buf, size_t len,
void *desc, void *context)
void *desc, fi_addr_t src_addr, void *context)
{
struct usdf_ep *ep;
struct usd_qp_impl *qp;
@ -93,7 +93,7 @@ usdf_msg_recv(struct fid_ep *fep, void *buf, size_t len,
ssize_t
usdf_msg_recvv(struct fid_ep *fep, const struct iovec *iov, void **desc,
size_t count, void *context)
size_t count, fi_addr_t src_addr, void *context)
{
struct usdf_ep *ep;
struct usd_recv_desc rxd;
@ -121,16 +121,9 @@ usdf_msg_recvv(struct fid_ep *fep, const struct iovec *iov, void **desc,
return usd_post_recv(ep->ep_qp, &rxd);
}
ssize_t
usdf_msg_recvfrom(struct fid_ep *fep, void *buf, size_t len, void *desc,
fi_addr_t src_addr, void *context)
{
return -FI_ENOSYS;
}
static inline ssize_t
_usdf_msg_sendto(struct usdf_ep *ep, struct usd_dest *dest,
const void *buf, size_t len, void *context)
_usdf_msg_send(struct usdf_ep *ep, struct usd_dest *dest,
const void *buf, size_t len, fi_addr_t dest_addr, void *context)
{
if (len <= USD_SEND_MAX_COPY - sizeof(struct usd_udp_hdr)) {
return usd_post_send_one_copy(ep->ep_qp, dest, buf, len,
@ -142,8 +135,8 @@ _usdf_msg_sendto(struct usdf_ep *ep, struct usd_dest *dest,
}
ssize_t
usdf_msg_sendto(struct fid_ep *fep, const void *buf, size_t len, void *desc,
fi_addr_t dest_addr, void *context)
usdf_msg_send(struct fid_ep *fep, const void *buf, size_t len, void *desc,
fi_addr_t dest_addr, void *context)
{
struct usdf_ep *ep;
struct usd_dest *dest;
@ -152,40 +145,21 @@ usdf_msg_sendto(struct fid_ep *fep, const void *buf, size_t len, void *desc,
ep = ep_ftou(fep);
dest = (struct usd_dest *)(uintptr_t)dest_addr;
return _usdf_msg_sendto(ep, dest, buf, len, context);
return ret;
}
ssize_t
usdf_msg_send(struct fid_ep *fep, const void *buf, size_t len,
void *desc, void *context)
{
struct usdf_ep *ep;
struct usd_dest *dest;
int ret;
ep = ep_ftou(fep);
dest = ep->ep_dest;
if (dest == NULL) {
return -FI_ENOTCONN;
}
return _usdf_msg_sendto(ep, dest, buf, len, context);
return _usdf_msg_send(ep, dest, buf, len, dest_addr, context);
return ret;
}
ssize_t
usdf_msg_senddata(struct fid_ep *ep, const void *buf, size_t len,
void *desc, uint64_t data, void *context)
void *desc, uint64_t data, fi_addr_t dest_addr, void *context)
{
return -FI_ENOSYS;
}
ssize_t
usdf_msg_sendv(struct fid_ep *ep, const struct iovec *iov, void **desc,
size_t count, void *context)
size_t count, fi_addr_t dest_addr, void *context)
{
return -FI_ENOSYS;
}
@ -197,22 +171,8 @@ usdf_msg_sendmsg(struct fid_ep *ep, const struct fi_msg *msg, uint64_t flags)
}
ssize_t
usdf_msg_inject(struct fid_ep *ep, const void *buf, size_t len)
{
return -FI_ENOSYS;
}
ssize_t
usdf_msg_injectto(struct fid_ep *ep, const void *buf, size_t len,
fi_addr_t dest_addr)
{
return -FI_ENOSYS;
}
ssize_t
usdf_msg_senddatato(struct fid_ep *ep, const void *buf, size_t len,
void *desc, uint64_t data, fi_addr_t dest_addr,
void *context)
usdf_msg_inject(struct fid_ep *ep, const void *buf, size_t len,
fi_addr_t dest_addr)
{
return -FI_ENOSYS;
}
@ -228,7 +188,7 @@ usdf_msg_recvmsg(struct fid_ep *ep, const struct fi_msg *msg, uint64_t flags)
*/
ssize_t
usdf_msg_prefix_recv(struct fid_ep *fep, void *buf, size_t len,
void *desc, void *context)
void *desc, fi_addr_t src_addr, void *context)
{
struct usdf_ep *ep;
struct usd_qp_impl *qp;
@ -253,7 +213,7 @@ usdf_msg_prefix_recv(struct fid_ep *fep, void *buf, size_t len,
ssize_t
usdf_msg_prefix_recvv(struct fid_ep *fep, const struct iovec *iov,
void **desc, size_t count, void *context)
void **desc, size_t count, fi_addr_t src_addr, void *context)
{
struct usdf_ep *ep;
struct usd_recv_desc rxd;

Просмотреть файл

@ -43,24 +43,23 @@ int usdf_cm_msg_shutdown(struct fid_ep *ep, uint64_t flags);
/* fi_ops_msg for RC */
ssize_t usdf_msg_recv(struct fid_ep *ep, void *buf, size_t len, void *desc,
void *context);
fi_addr_t src_addr, void *context);
ssize_t usdf_msg_recvv(struct fid_ep *ep, const struct iovec *iov,
void **desc, size_t count, void *context);
ssize_t usdf_msg_recvfrom(struct fid_ep *ep, void *buf, size_t len,
void *desc, fi_addr_t src_addr, void *context);
void **desc, size_t count, fi_addr_t src_addr, void *context);
ssize_t usdf_msg_recvmsg(struct fid_ep *ep, const struct fi_msg *msg,
uint64_t flags);
ssize_t usdf_msg_send(struct fid_ep *ep, const void *buf, size_t len,
void *desc, void *context);
void *desc, fi_addr_t src_addr, void *context);
ssize_t usdf_msg_sendv(struct fid_ep *ep, const struct iovec *iov,
void **desc, size_t count, void *context);
void **desc, size_t count, fi_addr_t src_addr, void *context);
ssize_t usdf_msg_sendmsg(struct fid_ep *ep, const struct fi_msg *msg,
uint64_t flags);
ssize_t usdf_msg_senddata(struct fid_ep *ep, const void *buf, size_t len,
void *desc, uint64_t data, void *context);
void *desc, uint64_t data, fi_addr_t src_addr, void *context);
ssize_t usdf_msg_inject(struct fid_ep *ep, const void *buf, size_t len);
ssize_t usdf_msg_inject(struct fid_ep *ep, const void *buf, size_t len,
fi_addr_t src_addr);

Просмотреть файл

@ -306,7 +306,6 @@ struct fi_ops usdf_pep_ops = {
.size = sizeof(struct fi_ops),
.close = usdf_pep_close,
.bind = usdf_pep_bind,
.sync = fi_no_sync,
.control = fi_no_control,
.ops_open = fi_no_ops_open
};

Просмотреть файл

@ -40,7 +40,7 @@
*
*
*/
#ident "$Id$"
#ident "$Id: cq_desc.h 129574 2013-04-26 22:11:14Z rfaucett $"
#ifndef _CQ_DESC_H_
#define _CQ_DESC_H_

Просмотреть файл

@ -40,7 +40,7 @@
*
*
*/
#ident "$Id$"
#ident "$Id: cq_enet_desc.h 160468 2014-02-18 09:50:15Z gvaradar $"
#ifndef _CQ_ENET_DESC_H_
#define _CQ_ENET_DESC_H_

Просмотреть файл

@ -40,7 +40,7 @@
*
*
*/
#ident "$Id: kcompat.h 194404 2014-10-17 09:03:00Z gvaradar $"
#ident "$Id: kcompat.h 195274 2014-10-24 06:32:21Z gvaradar $"
#ifndef _KCOMPAT_H_
#define _KCOMPAT_H_
@ -203,17 +203,18 @@ static inline bool skb_flow_dissect(const struct sk_buff *skb, struct flow_keys
}
#endif /*CONFIG_RFS_ACCEL*/
#if ((RHEL_RELEASE_CODE && RHEL_RELEASE_CODE == RHEL_RELEASE_VERSION(6, 5)))
#if ((RHEL_RELEASE_CODE && RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6, 5)))
#define skb_get_rxhash(skb) (skb)->rxhash
#endif /*RHEL_RELEASE_VERSION == 6.5*/
#endif /*LINUX >= 3.3.0*/
#ifdef CONFIG_RFS_ACCEL
#if ((RHEL_RELEASE_CODE && RHEL_RELEASE_CODE == RHEL_RELEASE_VERSION(6, 5)))
#if ((RHEL_RELEASE_CODE && RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6, 5) \
&& RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(7, 0)))
#define enic_netdev_rmap(enic) netdev_extended(enic->netdev)->rfs_data.rx_cpu_rmap
#else
#define enic_netdev_rmap(enic) enic->netdev->rx_cpu_rmap
#endif /*RHEL_RELEASE_VERSION == 6.5*/
#endif /*RHEL_RELEASE_VERSION >= 6.5 && < 7.0*/
#endif /*CONFIG_RFS_ACCEL*/
#if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 5, 00))

Просмотреть файл

@ -82,7 +82,7 @@ static inline void pci_free_consistent( __attribute__ ((unused))
#define usd_err(args...) fprintf(stderr, args)
#define pr_err usd_err
#define pr_warning usd_err
#define pr_warning(args...)
#ifndef wmb
#define wmb() asm volatile("" ::: "memory")

Просмотреть файл

@ -39,6 +39,7 @@
*
*
*/
#include "config.h"
#include <errno.h>
#include <arpa/inet.h>

Просмотреть файл

@ -40,7 +40,7 @@
*
*
*/
#ident "$Id$"
#ident "$Id: rq_enet_desc.h 59839 2010-09-27 20:36:31Z roprabhu $"
#ifndef _RQ_ENET_DESC_H_
#define _RQ_ENET_DESC_H_

Просмотреть файл

@ -86,10 +86,6 @@ struct usd_device {
/* VFs we have associated with this device */
struct usd_vf *ud_vf_list;
/* CQ group management */
struct usd_cq_group *ud_free_cq_grp;
uint32_t ud_next_cq_grp_id;
/* PD for this device */
uint32_t ud_pd_handle;
@ -168,19 +164,10 @@ enum usd_qstate {
USD_QS_READY = (1 << 5)
};
struct usd_cq_group {
struct usd_device *cqg_dev;
uint32_t cqg_id;
uint16_t cqg_num_qp;
uint16_t cqg_refcnt;
struct usd_cq_group *cqg_next;
};
struct usd_cq_impl {
struct usd_cq ucq_cq;
struct usd_device *ucq_dev;
struct usd_vf *ucq_vf;
struct usd_cq_group *ucq_cq_group;
uint32_t ucq_state;

Просмотреть файл

@ -187,7 +187,7 @@ usd_dest_progress_dev(
}
static void
usd_dest_progress()
usd_dest_progress(void)
{
struct usd_device *dev;

Просмотреть файл

@ -66,4 +66,8 @@ struct usd_dest_req {
TAILQ_ENTRY(usd_dest_req) udr_link;
};
void usd_fill_udp_dest(struct usd_dest *dest, struct usd_device_attrs *dap,
uint32_t daddr_be, uint16_t dport_be);
#endif /* _USD_DEST_H_ */

Некоторые файлы не были показаны из-за слишком большого количества измененных файлов Показать больше