From 1a1be2efa0345c50f391fa774d610570e3b18a4a Mon Sep 17 00:00:00 2001 From: Jeff Squyres Date: Tue, 10 Mar 2015 09:47:04 -0700 Subject: [PATCH] libfabric: update to Github upstream 7095f3dc --- .../common/libfabric/libfabric/Makefile.am | 251 +++++----- opal/mca/common/libfabric/libfabric/README | 2 +- .../common/libfabric/libfabric/include/fi.h | 4 +- .../libfabric/libfabric/include/fi_enosys.h | 6 +- .../libfabric/libfabric/include/rdma/fabric.h | 25 +- .../libfabric/include/rdma/fi_atomic.h | 1 - .../libfabric/include/rdma/fi_domain.h | 19 +- .../libfabric/include/rdma/fi_endpoint.h | 1 + .../libfabric/libfabric/include/rdma/fi_eq.h | 1 + .../libfabric/libfabric/include/rdma/fi_rma.h | 1 - .../libfabric/include/rdma/fi_tagged.h | 1 - .../libfabric/man/{ => man3}/fi_accept.3 | 0 .../libfabric/man/{ => man3}/fi_alias.3 | 0 .../man/{ => man3}/fi_atomic_valid.3 | 0 .../libfabric/man/{ => man3}/fi_atomicmsg.3 | 0 .../libfabric/man/{ => man3}/fi_atomicv.3 | 0 .../libfabric/man/{ => man3}/fi_av.3 | 8 +- .../libfabric/man/{ => man3}/fi_av_bind.3 | 0 .../libfabric/man/{ => man3}/fi_av_insert.3 | 0 .../man/{ => man3}/fi_av_insertsvc.3 | 0 .../libfabric/man/{ => man3}/fi_av_lookup.3 | 0 .../libfabric/man/{ => man3}/fi_av_open.3 | 0 .../libfabric/man/{ => man3}/fi_av_remove.3 | 0 .../libfabric/man/{ => man3}/fi_av_straddr.3 | 0 .../libfabric/man/{ => man3}/fi_cancel.3 | 0 .../libfabric/man/{ => man3}/fi_close.3 | 0 .../libfabric/man/{ => man3}/fi_cm.3 | 52 +- .../libfabric/man/{ => man3}/fi_cntr.3 | 2 +- .../libfabric/man/{ => man3}/fi_cntr_add.3 | 0 .../libfabric/man/{ => man3}/fi_cntr_open.3 | 0 .../libfabric/man/{ => man3}/fi_cntr_read.3 | 0 .../libfabric/man/{ => man3}/fi_cntr_set.3 | 0 .../libfabric/man/{ => man3}/fi_cntr_wait.3 | 0 .../man/{ => man3}/fi_compare_atomic.3 | 0 .../man/{ => man3}/fi_compare_atomic_valid.3 | 0 .../man/{ => man3}/fi_compare_atomicmsg.3 | 0 .../man/{ => man3}/fi_compare_atomicv.3 | 0 .../libfabric/man/{ => man3}/fi_connect.3 | 0 .../libfabric/man/{ => man3}/fi_control.3 | 2 +- .../libfabric/man/{ => man3}/fi_cq.3 | 38 +- .../libfabric/man/{ => man3}/fi_cq_open.3 | 0 .../libfabric/man/{ => man3}/fi_cq_read.3 | 0 .../libfabric/man/{ => man3}/fi_cq_readerr.3 | 0 .../libfabric/man/{ => man3}/fi_cq_readfrom.3 | 0 .../libfabric/man/{ => man3}/fi_cq_sread.3 | 0 .../man/{ => man3}/fi_cq_sreadfrom.3 | 0 .../libfabric/man/{ => man3}/fi_cq_strerror.3 | 0 .../libfabric/man/{ => man3}/fi_cq_write.3 | 0 .../libfabric/man/{ => man3}/fi_domain.3 | 43 +- .../libfabric/man/{ => man3}/fi_domain_bind.3 | 0 .../man/{ => man3}/fi_domain_query.3 | 0 .../libfabric/man/{ => man3}/fi_dupinfo.3 | 0 .../libfabric/man/{ => man3}/fi_enable.3 | 0 .../libfabric/man/{ => man3}/fi_endpoint.3 | 89 ++-- .../libfabric/man/{ => man3}/fi_ep_bind.3 | 0 .../libfabric/man/{ => man3}/fi_eq.3 | 55 ++- .../libfabric/man/{ => man3}/fi_eq_open.3 | 0 .../libfabric/man/{ => man3}/fi_eq_read.3 | 0 .../libfabric/man/{ => man3}/fi_eq_readerr.3 | 0 .../libfabric/man/{ => man3}/fi_eq_sread.3 | 0 .../libfabric/man/{ => man3}/fi_eq_strerror.3 | 0 .../libfabric/man/{ => man3}/fi_eq_write.3 | 0 .../libfabric/man/{ => man3}/fi_errno.3 | 2 +- .../libfabric/man/{ => man3}/fi_fabric.3 | 2 +- .../man/{ => man3}/fi_fetch_atomic.3 | 0 .../man/{ => man3}/fi_fetch_atomic_valid.3 | 0 .../man/{ => man3}/fi_fetch_atomicmsg.3 | 0 .../man/{ => man3}/fi_fetch_atomicv.3 | 0 .../libfabric/man/{ => man3}/fi_freeinfo.3 | 0 .../libfabric/man/{ => man3}/fi_getinfo.3 | 125 ++--- .../libfabric/man/{ => man3}/fi_getname.3 | 0 .../libfabric/man/{ => man3}/fi_getopt.3 | 0 .../libfabric/man/{ => man3}/fi_getpeer.3 | 0 .../libfabric/man/{ => man3}/fi_inject.3 | 0 .../man/{ => man3}/fi_inject_atomic.3 | 0 .../man/{ => man3}/fi_inject_write.3 | 0 .../man/{ => man3}/fi_inject_writedata.3 | 0 .../libfabric/man/{ => man3}/fi_injectdata.3 | 0 .../libfabric/man/{ => man3}/fi_join.3 | 0 .../libfabric/man/{ => man3}/fi_leave.3 | 0 .../libfabric/man/{ => man3}/fi_listen.3 | 0 .../libfabric/man/{ => man3}/fi_mr.3 | 2 +- .../libfabric/man/{ => man3}/fi_mr_bind.3 | 0 .../libfabric/man/{ => man3}/fi_mr_desc.3 | 0 .../libfabric/man/{ => man3}/fi_mr_key.3 | 0 .../libfabric/man/{ => man3}/fi_mr_reg.3 | 0 .../libfabric/man/{ => man3}/fi_mr_regattr.3 | 0 .../libfabric/man/{ => man3}/fi_mr_regv.3 | 0 .../libfabric/man/{ => man3}/fi_msg.3 | 6 +- .../libfabric/man/{ => man3}/fi_open.3 | 0 .../libfabric/man/{ => man3}/fi_passive_ep.3 | 0 .../libfabric/man/{ => man3}/fi_poll.3 | 2 +- .../libfabric/man/{ => man3}/fi_poll_add.3 | 0 .../libfabric/man/{ => man3}/fi_poll_del.3 | 0 .../libfabric/man/{ => man3}/fi_poll_open.3 | 0 .../libfabric/man/{ => man3}/fi_read.3 | 0 .../libfabric/man/{ => man3}/fi_readmsg.3 | 0 .../libfabric/man/{ => man3}/fi_readv.3 | 0 .../libfabric/man/{ => man3}/fi_recv.3 | 0 .../libfabric/man/{ => man3}/fi_recvmsg.3 | 0 .../libfabric/man/{ => man3}/fi_recvv.3 | 0 .../libfabric/man/{ => man3}/fi_reject.3 | 0 .../libfabric/man/{ => man3}/fi_rma.3 | 4 +- .../libfabric/man/{ => man3}/fi_rx_addr.3 | 0 .../man/{ => man3}/fi_rx_size_left.3 | 0 .../libfabric/man/{ => man3}/fi_send.3 | 0 .../libfabric/man/{ => man3}/fi_senddata.3 | 0 .../libfabric/man/{ => man3}/fi_sendmsg.3 | 0 .../libfabric/man/{ => man3}/fi_sendv.3 | 0 .../libfabric/man/{ => man3}/fi_setopt.3 | 0 .../libfabric/man/{ => man3}/fi_shutdown.3 | 0 .../libfabric/man/{ => man3}/fi_strerror.3 | 0 .../libfabric/man/{ => man3}/fi_tagged.3 | 10 +- .../libfabric/man/{ => man3}/fi_tinject.3 | 0 .../libfabric/man/{ => man3}/fi_tinjectdata.3 | 0 .../libfabric/man/{ => man3}/fi_tostr.3 | 0 .../libfabric/man/{ => man3}/fi_trecv.3 | 0 .../libfabric/man/{ => man3}/fi_trecvmsg.3 | 0 .../libfabric/man/{ => man3}/fi_trecvv.3 | 0 .../libfabric/man/{ => man3}/fi_trigger.3 | 2 +- .../libfabric/man/{ => man3}/fi_tsearch.3 | 0 .../libfabric/man/{ => man3}/fi_tsend.3 | 0 .../libfabric/man/{ => man3}/fi_tsenddata.3 | 0 .../libfabric/man/{ => man3}/fi_tsendmsg.3 | 0 .../libfabric/man/{ => man3}/fi_tsendv.3 | 0 .../man/{ => man3}/fi_tx_size_left.3 | 0 .../libfabric/man/{ => man3}/fi_version.3 | 2 +- .../libfabric/man/{ => man3}/fi_wait.3 | 0 .../libfabric/man/{ => man3}/fi_wait_open.3 | 0 .../libfabric/man/{ => man3}/fi_write.3 | 0 .../libfabric/man/{ => man3}/fi_writedata.3 | 0 .../libfabric/man/{ => man3}/fi_writemsg.3 | 0 .../libfabric/man/{ => man3}/fi_writev.3 | 0 .../libfabric/man/{ => man7}/fabric.7 | 2 +- .../libfabric/man/{ => man7}/fi_direct.7 | 2 +- .../libfabric/libfabric/prov/psm/src/psmx.h | 16 +- .../libfabric/prov/psm/src/psmx_atomic.c | 41 +- .../libfabric/prov/psm/src/psmx_av.c | 14 +- .../libfabric/prov/psm/src/psmx_cntr.c | 9 +- .../libfabric/prov/psm/src/psmx_cq.c | 54 +- .../libfabric/prov/psm/src/psmx_domain.c | 38 +- .../libfabric/prov/psm/src/psmx_ep.c | 16 +- .../libfabric/prov/psm/src/psmx_init.c | 182 ++++--- .../libfabric/prov/psm/src/psmx_mr.c | 24 +- .../libfabric/prov/psm/src/psmx_msg.c | 4 +- .../libfabric/prov/psm/src/psmx_msg2.c | 12 +- .../libfabric/prov/psm/src/psmx_rma.c | 21 +- .../libfabric/prov/psm/src/psmx_tagged.c | 16 +- .../libfabric/prov/psm/src/psmx_util.c | 44 +- .../libfabric/prov/psm/src/psmx_wait.c | 7 +- .../libfabric/prov/sockets/src/sock.h | 52 +- .../libfabric/prov/sockets/src/sock_atomic.c | 1 + .../libfabric/prov/sockets/src/sock_cntr.c | 94 ++-- .../libfabric/prov/sockets/src/sock_cq.c | 69 +-- .../libfabric/prov/sockets/src/sock_dom.c | 23 +- .../libfabric/prov/sockets/src/sock_ep.c | 136 ++++-- .../prov/sockets/src/sock_ep_dgram.c | 44 +- .../libfabric/prov/sockets/src/sock_ep_msg.c | 462 ++++++++++++------ .../libfabric/prov/sockets/src/sock_ep_rdm.c | 127 +++-- .../libfabric/prov/sockets/src/sock_eq.c | 16 +- .../libfabric/prov/sockets/src/sock_fabric.c | 9 +- .../libfabric/prov/sockets/src/sock_msg.c | 1 + .../prov/sockets/src/sock_progress.c | 29 +- .../libfabric/prov/sockets/src/sock_rma.c | 1 + .../libfabric/prov/sockets/src/sock_wait.c | 5 +- .../libfabric/libfabric/prov/usnic/src/usdf.h | 2 +- .../libfabric/prov/usnic/src/usdf_av.c | 2 + .../libfabric/prov/usnic/src/usdf_cm.c | 1 + .../libfabric/prov/usnic/src/usdf_endpoint.c | 2 +- .../libfabric/prov/usnic/src/usdf_ep_msg.c | 9 +- .../libfabric/prov/usnic/src/usdf_fabric.c | 15 +- .../libfabric/prov/usnic/src/usdf_mem.c | 10 +- .../libfabric/prov/usnic/src/usdf_pep.c | 8 +- .../libfabric/prov/verbs/src/fi_verbs.c | 229 +++++---- .../common/libfabric/libfabric/src/common.c | 32 +- .../common/libfabric/libfabric/src/enosys.c | 6 +- .../common/libfabric/libfabric/src/fabric.c | 29 +- .../common/libfabric/libfabric/src/fi_tostr.c | 47 +- 178 files changed, 1717 insertions(+), 1004 deletions(-) rename opal/mca/common/libfabric/libfabric/man/{ => man3}/fi_accept.3 (100%) rename opal/mca/common/libfabric/libfabric/man/{ => man3}/fi_alias.3 (100%) rename opal/mca/common/libfabric/libfabric/man/{ => man3}/fi_atomic_valid.3 (100%) rename opal/mca/common/libfabric/libfabric/man/{ => man3}/fi_atomicmsg.3 (100%) rename opal/mca/common/libfabric/libfabric/man/{ => man3}/fi_atomicv.3 (100%) rename opal/mca/common/libfabric/libfabric/man/{ => man3}/fi_av.3 (98%) rename opal/mca/common/libfabric/libfabric/man/{ => man3}/fi_av_bind.3 (100%) rename opal/mca/common/libfabric/libfabric/man/{ => man3}/fi_av_insert.3 (100%) rename opal/mca/common/libfabric/libfabric/man/{ => man3}/fi_av_insertsvc.3 (100%) rename opal/mca/common/libfabric/libfabric/man/{ => man3}/fi_av_lookup.3 (100%) rename opal/mca/common/libfabric/libfabric/man/{ => man3}/fi_av_open.3 (100%) rename opal/mca/common/libfabric/libfabric/man/{ => man3}/fi_av_remove.3 (100%) rename opal/mca/common/libfabric/libfabric/man/{ => man3}/fi_av_straddr.3 (100%) rename opal/mca/common/libfabric/libfabric/man/{ => man3}/fi_cancel.3 (100%) rename opal/mca/common/libfabric/libfabric/man/{ => man3}/fi_close.3 (100%) rename opal/mca/common/libfabric/libfabric/man/{ => man3}/fi_cm.3 (75%) rename opal/mca/common/libfabric/libfabric/man/{ => man3}/fi_cntr.3 (99%) rename opal/mca/common/libfabric/libfabric/man/{ => man3}/fi_cntr_add.3 (100%) rename opal/mca/common/libfabric/libfabric/man/{ => man3}/fi_cntr_open.3 (100%) rename opal/mca/common/libfabric/libfabric/man/{ => man3}/fi_cntr_read.3 (100%) rename opal/mca/common/libfabric/libfabric/man/{ => man3}/fi_cntr_set.3 (100%) rename opal/mca/common/libfabric/libfabric/man/{ => man3}/fi_cntr_wait.3 (100%) rename opal/mca/common/libfabric/libfabric/man/{ => man3}/fi_compare_atomic.3 (100%) rename opal/mca/common/libfabric/libfabric/man/{ => man3}/fi_compare_atomic_valid.3 (100%) rename opal/mca/common/libfabric/libfabric/man/{ => man3}/fi_compare_atomicmsg.3 (100%) rename opal/mca/common/libfabric/libfabric/man/{ => man3}/fi_compare_atomicv.3 (100%) rename opal/mca/common/libfabric/libfabric/man/{ => man3}/fi_connect.3 (100%) rename opal/mca/common/libfabric/libfabric/man/{ => man3}/fi_control.3 (97%) rename opal/mca/common/libfabric/libfabric/man/{ => man3}/fi_cq.3 (91%) rename opal/mca/common/libfabric/libfabric/man/{ => man3}/fi_cq_open.3 (100%) rename opal/mca/common/libfabric/libfabric/man/{ => man3}/fi_cq_read.3 (100%) rename opal/mca/common/libfabric/libfabric/man/{ => man3}/fi_cq_readerr.3 (100%) rename opal/mca/common/libfabric/libfabric/man/{ => man3}/fi_cq_readfrom.3 (100%) rename opal/mca/common/libfabric/libfabric/man/{ => man3}/fi_cq_sread.3 (100%) rename opal/mca/common/libfabric/libfabric/man/{ => man3}/fi_cq_sreadfrom.3 (100%) rename opal/mca/common/libfabric/libfabric/man/{ => man3}/fi_cq_strerror.3 (100%) rename opal/mca/common/libfabric/libfabric/man/{ => man3}/fi_cq_write.3 (100%) rename opal/mca/common/libfabric/libfabric/man/{ => man3}/fi_domain.3 (92%) rename opal/mca/common/libfabric/libfabric/man/{ => man3}/fi_domain_bind.3 (100%) rename opal/mca/common/libfabric/libfabric/man/{ => man3}/fi_domain_query.3 (100%) rename opal/mca/common/libfabric/libfabric/man/{ => man3}/fi_dupinfo.3 (100%) rename opal/mca/common/libfabric/libfabric/man/{ => man3}/fi_enable.3 (100%) rename opal/mca/common/libfabric/libfabric/man/{ => man3}/fi_endpoint.3 (94%) rename opal/mca/common/libfabric/libfabric/man/{ => man3}/fi_ep_bind.3 (100%) rename opal/mca/common/libfabric/libfabric/man/{ => man3}/fi_eq.3 (88%) rename opal/mca/common/libfabric/libfabric/man/{ => man3}/fi_eq_open.3 (100%) rename opal/mca/common/libfabric/libfabric/man/{ => man3}/fi_eq_read.3 (100%) rename opal/mca/common/libfabric/libfabric/man/{ => man3}/fi_eq_readerr.3 (100%) rename opal/mca/common/libfabric/libfabric/man/{ => man3}/fi_eq_sread.3 (100%) rename opal/mca/common/libfabric/libfabric/man/{ => man3}/fi_eq_strerror.3 (100%) rename opal/mca/common/libfabric/libfabric/man/{ => man3}/fi_eq_write.3 (100%) rename opal/mca/common/libfabric/libfabric/man/{ => man3}/fi_errno.3 (99%) rename opal/mca/common/libfabric/libfabric/man/{ => man3}/fi_fabric.3 (99%) rename opal/mca/common/libfabric/libfabric/man/{ => man3}/fi_fetch_atomic.3 (100%) rename opal/mca/common/libfabric/libfabric/man/{ => man3}/fi_fetch_atomic_valid.3 (100%) rename opal/mca/common/libfabric/libfabric/man/{ => man3}/fi_fetch_atomicmsg.3 (100%) rename opal/mca/common/libfabric/libfabric/man/{ => man3}/fi_fetch_atomicv.3 (100%) rename opal/mca/common/libfabric/libfabric/man/{ => man3}/fi_freeinfo.3 (100%) rename opal/mca/common/libfabric/libfabric/man/{ => man3}/fi_getinfo.3 (87%) rename opal/mca/common/libfabric/libfabric/man/{ => man3}/fi_getname.3 (100%) rename opal/mca/common/libfabric/libfabric/man/{ => man3}/fi_getopt.3 (100%) rename opal/mca/common/libfabric/libfabric/man/{ => man3}/fi_getpeer.3 (100%) rename opal/mca/common/libfabric/libfabric/man/{ => man3}/fi_inject.3 (100%) rename opal/mca/common/libfabric/libfabric/man/{ => man3}/fi_inject_atomic.3 (100%) rename opal/mca/common/libfabric/libfabric/man/{ => man3}/fi_inject_write.3 (100%) rename opal/mca/common/libfabric/libfabric/man/{ => man3}/fi_inject_writedata.3 (100%) rename opal/mca/common/libfabric/libfabric/man/{ => man3}/fi_injectdata.3 (100%) rename opal/mca/common/libfabric/libfabric/man/{ => man3}/fi_join.3 (100%) rename opal/mca/common/libfabric/libfabric/man/{ => man3}/fi_leave.3 (100%) rename opal/mca/common/libfabric/libfabric/man/{ => man3}/fi_listen.3 (100%) rename opal/mca/common/libfabric/libfabric/man/{ => man3}/fi_mr.3 (99%) rename opal/mca/common/libfabric/libfabric/man/{ => man3}/fi_mr_bind.3 (100%) rename opal/mca/common/libfabric/libfabric/man/{ => man3}/fi_mr_desc.3 (100%) rename opal/mca/common/libfabric/libfabric/man/{ => man3}/fi_mr_key.3 (100%) rename opal/mca/common/libfabric/libfabric/man/{ => man3}/fi_mr_reg.3 (100%) rename opal/mca/common/libfabric/libfabric/man/{ => man3}/fi_mr_regattr.3 (100%) rename opal/mca/common/libfabric/libfabric/man/{ => man3}/fi_mr_regv.3 (100%) rename opal/mca/common/libfabric/libfabric/man/{ => man3}/fi_msg.3 (98%) rename opal/mca/common/libfabric/libfabric/man/{ => man3}/fi_open.3 (100%) rename opal/mca/common/libfabric/libfabric/man/{ => man3}/fi_passive_ep.3 (100%) rename opal/mca/common/libfabric/libfabric/man/{ => man3}/fi_poll.3 (99%) rename opal/mca/common/libfabric/libfabric/man/{ => man3}/fi_poll_add.3 (100%) rename opal/mca/common/libfabric/libfabric/man/{ => man3}/fi_poll_del.3 (100%) rename opal/mca/common/libfabric/libfabric/man/{ => man3}/fi_poll_open.3 (100%) rename opal/mca/common/libfabric/libfabric/man/{ => man3}/fi_read.3 (100%) rename opal/mca/common/libfabric/libfabric/man/{ => man3}/fi_readmsg.3 (100%) rename opal/mca/common/libfabric/libfabric/man/{ => man3}/fi_readv.3 (100%) rename opal/mca/common/libfabric/libfabric/man/{ => man3}/fi_recv.3 (100%) rename opal/mca/common/libfabric/libfabric/man/{ => man3}/fi_recvmsg.3 (100%) rename opal/mca/common/libfabric/libfabric/man/{ => man3}/fi_recvv.3 (100%) rename opal/mca/common/libfabric/libfabric/man/{ => man3}/fi_reject.3 (100%) rename opal/mca/common/libfabric/libfabric/man/{ => man3}/fi_rma.3 (98%) rename opal/mca/common/libfabric/libfabric/man/{ => man3}/fi_rx_addr.3 (100%) rename opal/mca/common/libfabric/libfabric/man/{ => man3}/fi_rx_size_left.3 (100%) rename opal/mca/common/libfabric/libfabric/man/{ => man3}/fi_send.3 (100%) rename opal/mca/common/libfabric/libfabric/man/{ => man3}/fi_senddata.3 (100%) rename opal/mca/common/libfabric/libfabric/man/{ => man3}/fi_sendmsg.3 (100%) rename opal/mca/common/libfabric/libfabric/man/{ => man3}/fi_sendv.3 (100%) rename opal/mca/common/libfabric/libfabric/man/{ => man3}/fi_setopt.3 (100%) rename opal/mca/common/libfabric/libfabric/man/{ => man3}/fi_shutdown.3 (100%) rename opal/mca/common/libfabric/libfabric/man/{ => man3}/fi_strerror.3 (100%) rename opal/mca/common/libfabric/libfabric/man/{ => man3}/fi_tagged.3 (97%) rename opal/mca/common/libfabric/libfabric/man/{ => man3}/fi_tinject.3 (100%) rename opal/mca/common/libfabric/libfabric/man/{ => man3}/fi_tinjectdata.3 (100%) rename opal/mca/common/libfabric/libfabric/man/{ => man3}/fi_tostr.3 (100%) rename opal/mca/common/libfabric/libfabric/man/{ => man3}/fi_trecv.3 (100%) rename opal/mca/common/libfabric/libfabric/man/{ => man3}/fi_trecvmsg.3 (100%) rename opal/mca/common/libfabric/libfabric/man/{ => man3}/fi_trecvv.3 (100%) rename opal/mca/common/libfabric/libfabric/man/{ => man3}/fi_trigger.3 (99%) rename opal/mca/common/libfabric/libfabric/man/{ => man3}/fi_tsearch.3 (100%) rename opal/mca/common/libfabric/libfabric/man/{ => man3}/fi_tsend.3 (100%) rename opal/mca/common/libfabric/libfabric/man/{ => man3}/fi_tsenddata.3 (100%) rename opal/mca/common/libfabric/libfabric/man/{ => man3}/fi_tsendmsg.3 (100%) rename opal/mca/common/libfabric/libfabric/man/{ => man3}/fi_tsendv.3 (100%) rename opal/mca/common/libfabric/libfabric/man/{ => man3}/fi_tx_size_left.3 (100%) rename opal/mca/common/libfabric/libfabric/man/{ => man3}/fi_version.3 (96%) rename opal/mca/common/libfabric/libfabric/man/{ => man3}/fi_wait.3 (100%) rename opal/mca/common/libfabric/libfabric/man/{ => man3}/fi_wait_open.3 (100%) rename opal/mca/common/libfabric/libfabric/man/{ => man3}/fi_write.3 (100%) rename opal/mca/common/libfabric/libfabric/man/{ => man3}/fi_writedata.3 (100%) rename opal/mca/common/libfabric/libfabric/man/{ => man3}/fi_writemsg.3 (100%) rename opal/mca/common/libfabric/libfabric/man/{ => man3}/fi_writev.3 (100%) rename opal/mca/common/libfabric/libfabric/man/{ => man7}/fabric.7 (99%) rename opal/mca/common/libfabric/libfabric/man/{ => man7}/fi_direct.7 (99%) diff --git a/opal/mca/common/libfabric/libfabric/Makefile.am b/opal/mca/common/libfabric/libfabric/Makefile.am index 3ed403e551..fe5ec00f72 100644 --- a/opal/mca/common/libfabric/libfabric/Makefile.am +++ b/opal/mca/common/libfabric/libfabric/Makefile.am @@ -309,138 +309,139 @@ nodist_rdmainclude_HEADERS = \ endif HAVE_DIRECT real_man_pages = \ - man/fabric.7 \ - man/fi_av.3 \ - man/fi_cm.3 \ - man/fi_cntr.3 \ - man/fi_control.3 \ - man/fi_cq.3 \ - man/fi_direct.7 \ - man/fi_domain.3 \ - man/fi_endpoint.3 \ - man/fi_errno.3 \ - man/fi_eq.3 \ - man/fi_fabric.3 \ - man/fi_getinfo.3 \ - man/fi_mr.3 \ - man/fi_msg.3 \ - man/fi_poll.3 \ - man/fi_rma.3 \ - man/fi_tagged.3 \ - man/fi_trigger.3 \ - man/fi_version.3 + man/man3/fi_av.3 \ + man/man3/fi_cm.3 \ + man/man3/fi_cntr.3 \ + man/man3/fi_control.3 \ + man/man3/fi_cq.3 \ + man/man3/fi_domain.3 \ + man/man3/fi_endpoint.3 \ + man/man3/fi_errno.3 \ + man/man3/fi_eq.3 \ + man/man3/fi_fabric.3 \ + man/man3/fi_getinfo.3 \ + man/man3/fi_mr.3 \ + man/man3/fi_msg.3 \ + man/man3/fi_poll.3 \ + man/man3/fi_rma.3 \ + man/man3/fi_tagged.3 \ + man/man3/fi_trigger.3 \ + man/man3/fi_version.3 \ + man/man7/fabric.7 \ + man/man7/fi_direct.7 dummy_man_pages = \ - man/fi_accept.3 \ - man/fi_alias.3 \ - man/fi_atomic_valid.3 \ - man/fi_atomicmsg.3 \ - man/fi_atomicv.3 \ - man/fi_av_bind.3 \ - man/fi_av_insert.3 \ - man/fi_av_insertsvc.3 \ - man/fi_av_lookup.3 \ - man/fi_av_open.3 \ - man/fi_av_remove.3 \ - man/fi_av_straddr.3 \ - man/fi_cancel.3 \ - man/fi_close.3 \ - man/fi_cntr_add.3 \ - man/fi_cntr_open.3 \ - man/fi_cntr_read.3 \ - man/fi_cntr_set.3 \ - man/fi_cntr_wait.3 \ - man/fi_compare_atomic.3 \ - man/fi_compare_atomic_valid.3 \ - man/fi_compare_atomicmsg.3 \ - man/fi_compare_atomicv.3 \ - man/fi_connect.3 \ - man/fi_cq_open.3 \ - man/fi_cq_read.3 \ - man/fi_cq_readerr.3 \ - man/fi_cq_readfrom.3 \ - man/fi_cq_sread.3 \ - man/fi_cq_sreadfrom.3 \ - man/fi_cq_strerror.3 \ - man/fi_cq_write.3 \ - man/fi_domain_bind.3 \ - man/fi_domain_query.3 \ - man/fi_dupinfo.3 \ - man/fi_enable.3 \ - man/fi_ep_bind.3 \ - man/fi_eq_open.3 \ - man/fi_eq_read.3 \ - man/fi_eq_readerr.3 \ - man/fi_eq_sread.3 \ - man/fi_eq_strerror.3 \ - man/fi_eq_write.3 \ - man/fi_fetch_atomic.3 \ - man/fi_fetch_atomic_valid.3 \ - man/fi_fetch_atomicmsg.3 \ - man/fi_fetch_atomicv.3 \ - man/fi_freeinfo.3 \ - man/fi_getname.3 \ - man/fi_getopt.3 \ - man/fi_getpeer.3 \ - man/fi_inject.3 \ - man/fi_injectdata.3 \ - man/fi_inject_atomic.3 \ - man/fi_inject_write.3 \ - man/fi_inject_writedata.3 \ - man/fi_join.3 \ - man/fi_leave.3 \ - man/fi_listen.3 \ - man/fi_mr_bind.3 \ - man/fi_mr_desc.3 \ - man/fi_mr_key.3 \ - man/fi_mr_reg.3 \ - man/fi_mr_regattr.3 \ - man/fi_mr_regv.3 \ - man/fi_open.3 \ - man/fi_passive_ep.3 \ - man/fi_poll_add.3 \ - man/fi_poll_del.3 \ - man/fi_poll_open.3 \ - man/fi_read.3 \ - man/fi_readmsg.3 \ - man/fi_readv.3 \ - man/fi_recv.3 \ - man/fi_recvmsg.3 \ - man/fi_recvv.3 \ - man/fi_reject.3 \ - man/fi_rx_addr.3 \ - man/fi_rx_size_left.3 \ - man/fi_send.3 \ - man/fi_senddata.3 \ - man/fi_sendmsg.3 \ - man/fi_sendv.3 \ - man/fi_setopt.3 \ - man/fi_shutdown.3 \ - man/fi_strerror.3 \ - man/fi_tinject.3 \ - man/fi_tinjectdata.3 \ - man/fi_tostr.3 \ - man/fi_trecv.3 \ - man/fi_trecvmsg.3 \ - man/fi_trecvv.3 \ - man/fi_tsearch.3 \ - man/fi_tsend.3 \ - man/fi_tsenddata.3 \ - man/fi_tsendmsg.3 \ - man/fi_tsendv.3 \ - man/fi_tx_size_left.3 \ - man/fi_wait.3 \ - man/fi_wait_open.3 \ - man/fi_write.3 \ - man/fi_writedata.3 \ - man/fi_writemsg.3 \ - man/fi_writev.3 + man/man3/fi_accept.3 \ + man/man3/fi_alias.3 \ + man/man3/fi_atomic_valid.3 \ + man/man3/fi_atomicmsg.3 \ + man/man3/fi_atomicv.3 \ + man/man3/fi_av_bind.3 \ + man/man3/fi_av_insert.3 \ + man/man3/fi_av_insertsvc.3 \ + man/man3/fi_av_lookup.3 \ + man/man3/fi_av_open.3 \ + man/man3/fi_av_remove.3 \ + man/man3/fi_av_straddr.3 \ + man/man3/fi_cancel.3 \ + man/man3/fi_close.3 \ + man/man3/fi_cntr_add.3 \ + man/man3/fi_cntr_open.3 \ + man/man3/fi_cntr_read.3 \ + man/man3/fi_cntr_set.3 \ + man/man3/fi_cntr_wait.3 \ + man/man3/fi_compare_atomic.3 \ + man/man3/fi_compare_atomic_valid.3 \ + man/man3/fi_compare_atomicmsg.3 \ + man/man3/fi_compare_atomicv.3 \ + man/man3/fi_connect.3 \ + man/man3/fi_cq_open.3 \ + man/man3/fi_cq_read.3 \ + man/man3/fi_cq_readerr.3 \ + man/man3/fi_cq_readfrom.3 \ + man/man3/fi_cq_sread.3 \ + man/man3/fi_cq_sreadfrom.3 \ + man/man3/fi_cq_strerror.3 \ + man/man3/fi_cq_write.3 \ + man/man3/fi_domain_bind.3 \ + man/man3/fi_domain_query.3 \ + man/man3/fi_dupinfo.3 \ + man/man3/fi_enable.3 \ + man/man3/fi_ep_bind.3 \ + man/man3/fi_eq_open.3 \ + man/man3/fi_eq_read.3 \ + man/man3/fi_eq_readerr.3 \ + man/man3/fi_eq_sread.3 \ + man/man3/fi_eq_strerror.3 \ + man/man3/fi_eq_write.3 \ + man/man3/fi_fetch_atomic.3 \ + man/man3/fi_fetch_atomic_valid.3 \ + man/man3/fi_fetch_atomicmsg.3 \ + man/man3/fi_fetch_atomicv.3 \ + man/man3/fi_freeinfo.3 \ + man/man3/fi_getname.3 \ + man/man3/fi_getopt.3 \ + man/man3/fi_getpeer.3 \ + man/man3/fi_inject.3 \ + man/man3/fi_injectdata.3 \ + man/man3/fi_inject_atomic.3 \ + man/man3/fi_inject_write.3 \ + man/man3/fi_inject_writedata.3 \ + man/man3/fi_join.3 \ + man/man3/fi_leave.3 \ + man/man3/fi_listen.3 \ + man/man3/fi_mr_bind.3 \ + man/man3/fi_mr_desc.3 \ + man/man3/fi_mr_key.3 \ + man/man3/fi_mr_reg.3 \ + man/man3/fi_mr_regattr.3 \ + man/man3/fi_mr_regv.3 \ + man/man3/fi_open.3 \ + man/man3/fi_passive_ep.3 \ + man/man3/fi_poll_add.3 \ + man/man3/fi_poll_del.3 \ + man/man3/fi_poll_open.3 \ + man/man3/fi_read.3 \ + man/man3/fi_readmsg.3 \ + man/man3/fi_readv.3 \ + man/man3/fi_recv.3 \ + man/man3/fi_recvmsg.3 \ + man/man3/fi_recvv.3 \ + man/man3/fi_reject.3 \ + man/man3/fi_rx_addr.3 \ + man/man3/fi_rx_size_left.3 \ + man/man3/fi_send.3 \ + man/man3/fi_senddata.3 \ + man/man3/fi_sendmsg.3 \ + man/man3/fi_sendv.3 \ + man/man3/fi_setopt.3 \ + man/man3/fi_shutdown.3 \ + man/man3/fi_strerror.3 \ + man/man3/fi_tinject.3 \ + man/man3/fi_tinjectdata.3 \ + man/man3/fi_tostr.3 \ + man/man3/fi_trecv.3 \ + man/man3/fi_trecvmsg.3 \ + man/man3/fi_trecvv.3 \ + man/man3/fi_tsearch.3 \ + man/man3/fi_tsend.3 \ + man/man3/fi_tsenddata.3 \ + man/man3/fi_tsendmsg.3 \ + man/man3/fi_tsendv.3 \ + man/man3/fi_tx_size_left.3 \ + man/man3/fi_wait.3 \ + man/man3/fi_wait_open.3 \ + man/man3/fi_write.3 \ + man/man3/fi_writedata.3 \ + man/man3/fi_writemsg.3 \ + man/man3/fi_writev.3 man_MANS = $(real_man_pages) $(dummy_man_pages) nroff: @for file in $(real_man_pages); do \ - config/md2nroff.pl --source=$$file.md; \ + source=`echo $$file | sed -e 's@/man[0-9]@@'`; \ + config/md2nroff.pl --source=$$source.md; \ done EXTRA_DIST = libfabric.map libfabric.spec.in config/distscript.pl $(man_MANS) diff --git a/opal/mca/common/libfabric/libfabric/README b/opal/mca/common/libfabric/libfabric/README index 2d0c88d576..a9d1b13e1b 100644 --- a/opal/mca/common/libfabric/libfabric/README +++ b/opal/mca/common/libfabric/libfabric/README @@ -1,7 +1,7 @@ This README is for userspace RDMA fabric library. Version Libfabric v1.0.0rc3 -Released on 2015-02-20 +Released on 2015-03-10 Building ======== diff --git a/opal/mca/common/libfabric/libfabric/include/fi.h b/opal/mca/common/libfabric/libfabric/include/fi.h index 3fc5fd53fc..581171df42 100644 --- a/opal/mca/common/libfabric/libfabric/include/fi.h +++ b/opal/mca/common/libfabric/libfabric/include/fi.h @@ -197,8 +197,6 @@ int fi_read_file(const char *dir, const char *file, char *buf, size_t size); int fi_poll_fd(int fd, int timeout); int fi_wait_cond(pthread_cond_t *cond, pthread_mutex_t *mut, int timeout); -struct fi_info *fi_allocinfo_internal(void); - int fi_sockaddr_len(struct sockaddr *addr); size_t fi_datatype_size(enum fi_datatype datatype); uint64_t fi_tag_bits(uint64_t mem_tag_format); @@ -209,6 +207,8 @@ int fi_recv_allowed(uint64_t caps); int fi_rma_initiate_allowed(uint64_t caps); int fi_rma_target_allowed(uint64_t caps); +uint64_t fi_gettime_ms(); + #define RDMA_CONF_DIR SYSCONFDIR "/" RDMADIR #define FI_CONF_DIR RDMA_CONF_DIR "/fabric" diff --git a/opal/mca/common/libfabric/libfabric/include/fi_enosys.h b/opal/mca/common/libfabric/libfabric/include/fi_enosys.h index 3bf4cd0e20..9a44f61612 100644 --- a/opal/mca/common/libfabric/libfabric/include/fi_enosys.h +++ b/opal/mca/common/libfabric/libfabric/include/fi_enosys.h @@ -216,14 +216,14 @@ static struct fi_ops_mr X = { .regattr = fi_no_mr_regattr, }; */ -int fi_no_mr_reg(struct fid_domain *domain, const void *buf, size_t len, +int fi_no_mr_reg(struct fid *fid, const void *buf, size_t len, uint64_t access, uint64_t offset, uint64_t requested_key, uint64_t flags, struct fid_mr **mr, void *context); -int fi_no_mr_regv(struct fid_domain *domain, const struct iovec *iov, +int fi_no_mr_regv(struct fid *fid, const struct iovec *iov, size_t count, uint64_t access, uint64_t offset, uint64_t requested_key, uint64_t flags, struct fid_mr **mr, void *context); -int fi_no_mr_regattr(struct fid_domain *domain, const struct fi_mr_attr *attr, +int fi_no_mr_regattr(struct fid *fid, const struct fi_mr_attr *attr, uint64_t flags, struct fid_mr **mr); /* diff --git a/opal/mca/common/libfabric/libfabric/include/rdma/fabric.h b/opal/mca/common/libfabric/libfabric/include/rdma/fabric.h index fb87c32634..fc4d79089b 100644 --- a/opal/mca/common/libfabric/libfabric/include/rdma/fabric.h +++ b/opal/mca/common/libfabric/libfabric/include/rdma/fabric.h @@ -35,7 +35,6 @@ #include #include -#include #include #ifdef __cplusplus @@ -48,6 +47,9 @@ extern "C" { ((type *) ((char *)ptr - offsetof(type, field))) #endif +#define FI_DEFINE_HANDLE(name) struct name##_s { int dummy; }; \ + typedef struct name##_s *name + enum { FI_MAJOR_VERSION = 1, FI_MINOR_VERSION = 0, @@ -99,7 +101,6 @@ typedef struct fid *fid_t; #define FI_ATOMICS FI_ATOMIC #define FI_DYNAMIC_MR (1ULL << 7) #define FI_NAMED_RX_CTX (1ULL << 8) -#define FI_BUFFERED_RECV (1ULL << 9) #define FI_DIRECTED_RECV (1ULL << 10) /* @@ -158,7 +159,13 @@ enum { #define FI_ADDR_NOTAVAIL UINT64_MAX #define FI_SHARED_CONTEXT UINT64_MAX typedef uint64_t fi_addr_t; -typedef void * fi_connreq_t; +FI_DEFINE_HANDLE(fi_connreq_t); + +enum fi_av_type { + FI_AV_UNSPEC, + FI_AV_MAP, + FI_AV_TABLE +}; enum fi_progress { FI_PROGRESS_UNSPEC, @@ -248,11 +255,10 @@ struct fi_rx_attr { }; struct fi_ep_attr { + enum fi_ep_type type; uint32_t protocol; uint32_t protocol_version; size_t max_msg_size; - size_t inject_size; - size_t total_buffered_recv; size_t msg_prefix_size; size_t max_order_raw_size; size_t max_order_war_size; @@ -271,6 +277,7 @@ struct fi_domain_attr { enum fi_progress control_progress; enum fi_progress data_progress; enum fi_resource_mgmt resource_mgmt; + enum fi_av_type av_type; size_t mr_key_size; size_t cq_data_size; size_t cq_cnt; @@ -292,7 +299,6 @@ struct fi_info { struct fi_info *next; uint64_t caps; uint64_t mode; - enum fi_ep_type ep_type; uint32_t addr_format; size_t src_addrlen; size_t dest_addrlen; @@ -353,6 +359,11 @@ int fi_getinfo(uint32_t version, const char *node, const char *service, void fi_freeinfo(struct fi_info *info); struct fi_info *fi_dupinfo(const struct fi_info *info); +static inline struct fi_info *fi_allocinfo(void) +{ + return fi_dupinfo(NULL); +} + struct fi_ops_fabric { size_t size; int (*domain)(struct fid_fabric *fabric, struct fi_info *info, @@ -441,6 +452,8 @@ enum fi_type { FI_TYPE_ATOMIC_TYPE, FI_TYPE_ATOMIC_OP, FI_TYPE_VERSION, + FI_TYPE_EQ_EVENT, + FI_TYPE_CQ_EVENT_FLAGS, }; char *fi_tostr(const void *data, enum fi_type datatype); diff --git a/opal/mca/common/libfabric/libfabric/include/rdma/fi_atomic.h b/opal/mca/common/libfabric/libfabric/include/rdma/fi_atomic.h index 7a1ecc5d8e..005481b02f 100644 --- a/opal/mca/common/libfabric/libfabric/include/rdma/fi_atomic.h +++ b/opal/mca/common/libfabric/libfabric/include/rdma/fi_atomic.h @@ -33,7 +33,6 @@ #ifndef _FI_ATOMIC_H_ #define _FI_ATOMIC_H_ -#include #include #include #include diff --git a/opal/mca/common/libfabric/libfabric/include/rdma/fi_domain.h b/opal/mca/common/libfabric/libfabric/include/rdma/fi_domain.h index 18fc22eae4..5702a2451d 100644 --- a/opal/mca/common/libfabric/libfabric/include/rdma/fi_domain.h +++ b/opal/mca/common/libfabric/libfabric/include/rdma/fi_domain.h @@ -47,11 +47,6 @@ extern "C" { * Maps and stores transport/network addresses. */ -enum fi_av_type { - FI_AV_MAP, - FI_AV_TABLE -}; - struct fi_av_attr { enum fi_av_type type; int rx_ctx_bits; @@ -140,14 +135,14 @@ struct fi_ops_domain { struct fi_ops_mr { size_t size; - int (*reg)(struct fid_domain *domain, const void *buf, size_t len, + int (*reg)(struct fid *fid, const void *buf, size_t len, uint64_t access, uint64_t offset, uint64_t requested_key, uint64_t flags, struct fid_mr **mr, void *context); - int (*regv)(struct fid_domain *domain, const struct iovec *iov, + int (*regv)(struct fid *fid, const struct iovec *iov, size_t count, uint64_t access, uint64_t offset, uint64_t requested_key, uint64_t flags, struct fid_mr **mr, void *context); - int (*regattr)(struct fid_domain *domain, const struct fi_mr_attr *attr, + int (*regattr)(struct fid *fid, const struct fi_mr_attr *attr, uint64_t flags, struct fid_mr **mr); }; @@ -209,7 +204,7 @@ fi_mr_reg(struct fid_domain *domain, const void *buf, size_t len, uint64_t access, uint64_t offset, uint64_t requested_key, uint64_t flags, struct fid_mr **mr, void *context) { - return domain->mr->reg(domain, buf, len, access, offset, + return domain->mr->reg(&domain->fid, buf, len, access, offset, requested_key, flags, mr, context); } @@ -276,6 +271,12 @@ fi_av_lookup(struct fid_av *av, fi_addr_t fi_addr, void *addr, size_t *addrlen) return av->ops->lookup(av, fi_addr, addr, addrlen); } +static inline const char * +fi_av_straddr(struct fid_av *av, const void *addr, char *buf, size_t *len) +{ + return av->ops->straddr(av, addr, buf, len); +} + static inline fi_addr_t fi_rx_addr(fi_addr_t fi_addr, int rx_index, int rx_ctx_bits) { diff --git a/opal/mca/common/libfabric/libfabric/include/rdma/fi_endpoint.h b/opal/mca/common/libfabric/libfabric/include/rdma/fi_endpoint.h index 27203d3d3d..4b30e11ee3 100644 --- a/opal/mca/common/libfabric/libfabric/include/rdma/fi_endpoint.h +++ b/opal/mca/common/libfabric/libfabric/include/rdma/fi_endpoint.h @@ -61,6 +61,7 @@ enum { /* FI_OPT_ENDPOINT option names */ enum { FI_OPT_MIN_MULTI_RECV, /* size_t */ + FI_OPT_CM_DATA_SIZE, /* size_t */ }; struct fi_ops_ep { diff --git a/opal/mca/common/libfabric/libfabric/include/rdma/fi_eq.h b/opal/mca/common/libfabric/libfabric/include/rdma/fi_eq.h index e3fc897b60..3e6ce52194 100644 --- a/opal/mca/common/libfabric/libfabric/include/rdma/fi_eq.h +++ b/opal/mca/common/libfabric/libfabric/include/rdma/fi_eq.h @@ -137,6 +137,7 @@ struct fi_eq_err_entry { int prov_errno; /* err_data is available until the next time the CQ is read */ void *err_data; + size_t err_data_size; }; struct fi_eq_cm_entry { diff --git a/opal/mca/common/libfabric/libfabric/include/rdma/fi_rma.h b/opal/mca/common/libfabric/libfabric/include/rdma/fi_rma.h index aa2958414e..939241f2a9 100644 --- a/opal/mca/common/libfabric/libfabric/include/rdma/fi_rma.h +++ b/opal/mca/common/libfabric/libfabric/include/rdma/fi_rma.h @@ -33,7 +33,6 @@ #ifndef _FI_RMA_H_ #define _FI_RMA_H_ -#include #include #include diff --git a/opal/mca/common/libfabric/libfabric/include/rdma/fi_tagged.h b/opal/mca/common/libfabric/libfabric/include/rdma/fi_tagged.h index f6c35f98d9..3c25792e13 100644 --- a/opal/mca/common/libfabric/libfabric/include/rdma/fi_tagged.h +++ b/opal/mca/common/libfabric/libfabric/include/rdma/fi_tagged.h @@ -33,7 +33,6 @@ #ifndef _FI_TAGGED_H_ #define _FI_TAGGED_H_ -#include #include #include diff --git a/opal/mca/common/libfabric/libfabric/man/fi_accept.3 b/opal/mca/common/libfabric/libfabric/man/man3/fi_accept.3 similarity index 100% rename from opal/mca/common/libfabric/libfabric/man/fi_accept.3 rename to opal/mca/common/libfabric/libfabric/man/man3/fi_accept.3 diff --git a/opal/mca/common/libfabric/libfabric/man/fi_alias.3 b/opal/mca/common/libfabric/libfabric/man/man3/fi_alias.3 similarity index 100% rename from opal/mca/common/libfabric/libfabric/man/fi_alias.3 rename to opal/mca/common/libfabric/libfabric/man/man3/fi_alias.3 diff --git a/opal/mca/common/libfabric/libfabric/man/fi_atomic_valid.3 b/opal/mca/common/libfabric/libfabric/man/man3/fi_atomic_valid.3 similarity index 100% rename from opal/mca/common/libfabric/libfabric/man/fi_atomic_valid.3 rename to opal/mca/common/libfabric/libfabric/man/man3/fi_atomic_valid.3 diff --git a/opal/mca/common/libfabric/libfabric/man/fi_atomicmsg.3 b/opal/mca/common/libfabric/libfabric/man/man3/fi_atomicmsg.3 similarity index 100% rename from opal/mca/common/libfabric/libfabric/man/fi_atomicmsg.3 rename to opal/mca/common/libfabric/libfabric/man/man3/fi_atomicmsg.3 diff --git a/opal/mca/common/libfabric/libfabric/man/fi_atomicv.3 b/opal/mca/common/libfabric/libfabric/man/man3/fi_atomicv.3 similarity index 100% rename from opal/mca/common/libfabric/libfabric/man/fi_atomicv.3 rename to opal/mca/common/libfabric/libfabric/man/man3/fi_atomicv.3 diff --git a/opal/mca/common/libfabric/libfabric/man/fi_av.3 b/opal/mca/common/libfabric/libfabric/man/man3/fi_av.3 similarity index 98% rename from opal/mca/common/libfabric/libfabric/man/fi_av.3 rename to opal/mca/common/libfabric/libfabric/man/man3/fi_av.3 index e730835191..ec56fe8fc8 100644 --- a/opal/mca/common/libfabric/libfabric/man/fi_av.3 +++ b/opal/mca/common/libfabric/libfabric/man/man3/fi_av.3 @@ -1,4 +1,4 @@ -.TH fi_av 3 "2015\-02\-12" "Libfabric Programmer\[aq]s Manual" "Libfabric v1.0.0rc3" +.TH fi_av 3 "2015\-03\-09" "Libfabric Programmer\[aq]s Manual" "\@VERSION\@" .SH NAME .PP fi_av - Address vector operations @@ -47,7 +47,7 @@ fi_addr_t\ fi_rx_addr(fi_addr_t\ fi_addr,\ int\ rx_index, \ \ \ \ \ \ int\ rx_ctx_bits); const\ char\ *\ fi_av_straddr(struct\ fid_av\ *av,\ const\ void\ *addr, -\ \ \ \ \ \ void\ *buf,\ size_t\ len); +\ \ \ \ \ \ void\ *buf,\ size_t\ *len); \f[] .fi .SH ARGUMENTS @@ -365,8 +365,8 @@ The specified address must be of the same format as those stored by the AV, though the address itself is not required to have been inserted. On input, the len parameter should specify the size of the buffer referenced by buf. -On output, the actual size needed to write the entire string will be -returned. +On output, addrlen is set to the size of the buffer needed to store the +address. This size may be larger than the input len. If the provided buffer is too small, the results will be truncated. fi_av_straddr returns a pointer to buf. diff --git a/opal/mca/common/libfabric/libfabric/man/fi_av_bind.3 b/opal/mca/common/libfabric/libfabric/man/man3/fi_av_bind.3 similarity index 100% rename from opal/mca/common/libfabric/libfabric/man/fi_av_bind.3 rename to opal/mca/common/libfabric/libfabric/man/man3/fi_av_bind.3 diff --git a/opal/mca/common/libfabric/libfabric/man/fi_av_insert.3 b/opal/mca/common/libfabric/libfabric/man/man3/fi_av_insert.3 similarity index 100% rename from opal/mca/common/libfabric/libfabric/man/fi_av_insert.3 rename to opal/mca/common/libfabric/libfabric/man/man3/fi_av_insert.3 diff --git a/opal/mca/common/libfabric/libfabric/man/fi_av_insertsvc.3 b/opal/mca/common/libfabric/libfabric/man/man3/fi_av_insertsvc.3 similarity index 100% rename from opal/mca/common/libfabric/libfabric/man/fi_av_insertsvc.3 rename to opal/mca/common/libfabric/libfabric/man/man3/fi_av_insertsvc.3 diff --git a/opal/mca/common/libfabric/libfabric/man/fi_av_lookup.3 b/opal/mca/common/libfabric/libfabric/man/man3/fi_av_lookup.3 similarity index 100% rename from opal/mca/common/libfabric/libfabric/man/fi_av_lookup.3 rename to opal/mca/common/libfabric/libfabric/man/man3/fi_av_lookup.3 diff --git a/opal/mca/common/libfabric/libfabric/man/fi_av_open.3 b/opal/mca/common/libfabric/libfabric/man/man3/fi_av_open.3 similarity index 100% rename from opal/mca/common/libfabric/libfabric/man/fi_av_open.3 rename to opal/mca/common/libfabric/libfabric/man/man3/fi_av_open.3 diff --git a/opal/mca/common/libfabric/libfabric/man/fi_av_remove.3 b/opal/mca/common/libfabric/libfabric/man/man3/fi_av_remove.3 similarity index 100% rename from opal/mca/common/libfabric/libfabric/man/fi_av_remove.3 rename to opal/mca/common/libfabric/libfabric/man/man3/fi_av_remove.3 diff --git a/opal/mca/common/libfabric/libfabric/man/fi_av_straddr.3 b/opal/mca/common/libfabric/libfabric/man/man3/fi_av_straddr.3 similarity index 100% rename from opal/mca/common/libfabric/libfabric/man/fi_av_straddr.3 rename to opal/mca/common/libfabric/libfabric/man/man3/fi_av_straddr.3 diff --git a/opal/mca/common/libfabric/libfabric/man/fi_cancel.3 b/opal/mca/common/libfabric/libfabric/man/man3/fi_cancel.3 similarity index 100% rename from opal/mca/common/libfabric/libfabric/man/fi_cancel.3 rename to opal/mca/common/libfabric/libfabric/man/man3/fi_cancel.3 diff --git a/opal/mca/common/libfabric/libfabric/man/fi_close.3 b/opal/mca/common/libfabric/libfabric/man/man3/fi_close.3 similarity index 100% rename from opal/mca/common/libfabric/libfabric/man/fi_close.3 rename to opal/mca/common/libfabric/libfabric/man/man3/fi_close.3 diff --git a/opal/mca/common/libfabric/libfabric/man/fi_cm.3 b/opal/mca/common/libfabric/libfabric/man/man3/fi_cm.3 similarity index 75% rename from opal/mca/common/libfabric/libfabric/man/fi_cm.3 rename to opal/mca/common/libfabric/libfabric/man/man3/fi_cm.3 index ae6faa6b6f..54633c6195 100644 --- a/opal/mca/common/libfabric/libfabric/man/fi_cm.3 +++ b/opal/mca/common/libfabric/libfabric/man/man3/fi_cm.3 @@ -1,4 +1,4 @@ -.TH fi_cm 3 "2015\-01\-01" "Libfabric Programmer\[aq]s Manual" "Libfabric v1.0.0rc3" +.TH fi_cm 3 "2015\-02\-26" "Libfabric Programmer\[aq]s Manual" "\@VERSION\@" .SH NAME .PP fi_cm - Connection management operations @@ -102,29 +102,36 @@ fi_accept as opposed to the listening passive endpoint. Outbound data transfers cannot be initiated on a connection-oriented endpoint until an FI_CONNECTED event has been generated. However, receive buffers may be associated with an endpoint anytime. -.PP -For connection-oriented endpoints, the param buffer will be sent as part -of the connection request or response, subject to the constraints of the -underlying connection protocol. -Applications may use fi_control to determine the size of application -data that may be exchanged as part of a connection request or response. -The fi_connect, fi_accept, and fi_reject calls will silently truncate -any application data which cannot fit into underlying protocol messages. .SS fi_shutdown .PP The fi_shutdown call is used to gracefully disconnect an endpoint from its peer. -If shutdown flags are 0, the endpoint is fully disconnected, and no -additional data transfers will be possible. -Flags may also be used to indicate that only outbound (FI_WRITE) or -inbound (FI_READ) data transfers should be disconnected. -Regardless of the shutdown option selected, any queued completions -associated with asynchronous operations may still be retrieved from the -corresponding event queues. +The flags parameter is reserved and must be 0. +.PP +Outstanding operations posted to the endpoint when fi_shutdown is called +will be canceled or discarded. +Notification of canceled operations will be reported by the provider to +the corresponding completion queue(s). +Discarded operations will silently be dropped, with no completions +generated. +The choice of canceling, versus discarding operations, is provider +dependent. +However, all canceled completions will be written before fi_shutdown +returns. +.PP +When called, fi_shutdown does not affect completions already written to +a completion queue. +Any queued completions associated with asynchronous operations posted to +the endpoint may still be retrieved from the corresponding completion +queue(s) after an endpoint has been shutdown. .PP An FI_SHUTDOWN event will be generated for an endpoint when the remote peer issues a disconnect using fi_shutdown or abruptly closes the endpoint. +Note that in the abrupt close case, an FI_SHUTDOWN event will only be +generated if the peer system is reachable and a service or kernel agent +on the peer system is able to notify the local endpoint that the +connection has been aborted. .SS fi_getname / fi_getpeer .PP The fi_getname and fi_getpeer calls may be used to retrieve the local or @@ -145,6 +152,19 @@ On error, a negative value corresponding to fabric errno is returned. Fabric errno values are defined in \f[C]rdma/fi_errno.h\f[]. .SH ERRORS .SH NOTES +.PP +For connection-oriented endpoints, the buffer referenced by param will +be sent as part of the connection request or response, subject to the +constraints of the underlying connection protocol. +Applications may use fi_getopt with the FI_OPT_CM_DATA_SIZE endpoint +option to determine the size of application data that may be exchanged +as part of a connection request or response. +The fi_connect, fi_accept, and fi_reject calls will silently truncate +any application data which cannot fit into underlying protocol messages. +User data exchanged as part of the connection process is available as +part of the fi_eq_cm_entry structure, for FI_CONNREQ and FI_CONNECTED +events, or as additional err_data to fi_eq_err_entry, in the case of a +rejected connection. .SH SEE ALSO .PP \f[C]fi_getinfo\f[](3), \f[C]fi_endpoint\f[](3), \f[C]fi_domain\f[](3), diff --git a/opal/mca/common/libfabric/libfabric/man/fi_cntr.3 b/opal/mca/common/libfabric/libfabric/man/man3/fi_cntr.3 similarity index 99% rename from opal/mca/common/libfabric/libfabric/man/fi_cntr.3 rename to opal/mca/common/libfabric/libfabric/man/man3/fi_cntr.3 index bec00b46b7..0129984355 100644 --- a/opal/mca/common/libfabric/libfabric/man/fi_cntr.3 +++ b/opal/mca/common/libfabric/libfabric/man/man3/fi_cntr.3 @@ -1,4 +1,4 @@ -.TH fi_cntr 3 "2015\-01\-29" "Libfabric Programmer\[aq]s Manual" "Libfabric v1.0.0rc3" +.TH fi_cntr 3 "2015\-01\-29" "Libfabric Programmer\[aq]s Manual" "\@VERSION\@" .SH NAME .PP fi_cntr - Completion and event counter operations diff --git a/opal/mca/common/libfabric/libfabric/man/fi_cntr_add.3 b/opal/mca/common/libfabric/libfabric/man/man3/fi_cntr_add.3 similarity index 100% rename from opal/mca/common/libfabric/libfabric/man/fi_cntr_add.3 rename to opal/mca/common/libfabric/libfabric/man/man3/fi_cntr_add.3 diff --git a/opal/mca/common/libfabric/libfabric/man/fi_cntr_open.3 b/opal/mca/common/libfabric/libfabric/man/man3/fi_cntr_open.3 similarity index 100% rename from opal/mca/common/libfabric/libfabric/man/fi_cntr_open.3 rename to opal/mca/common/libfabric/libfabric/man/man3/fi_cntr_open.3 diff --git a/opal/mca/common/libfabric/libfabric/man/fi_cntr_read.3 b/opal/mca/common/libfabric/libfabric/man/man3/fi_cntr_read.3 similarity index 100% rename from opal/mca/common/libfabric/libfabric/man/fi_cntr_read.3 rename to opal/mca/common/libfabric/libfabric/man/man3/fi_cntr_read.3 diff --git a/opal/mca/common/libfabric/libfabric/man/fi_cntr_set.3 b/opal/mca/common/libfabric/libfabric/man/man3/fi_cntr_set.3 similarity index 100% rename from opal/mca/common/libfabric/libfabric/man/fi_cntr_set.3 rename to opal/mca/common/libfabric/libfabric/man/man3/fi_cntr_set.3 diff --git a/opal/mca/common/libfabric/libfabric/man/fi_cntr_wait.3 b/opal/mca/common/libfabric/libfabric/man/man3/fi_cntr_wait.3 similarity index 100% rename from opal/mca/common/libfabric/libfabric/man/fi_cntr_wait.3 rename to opal/mca/common/libfabric/libfabric/man/man3/fi_cntr_wait.3 diff --git a/opal/mca/common/libfabric/libfabric/man/fi_compare_atomic.3 b/opal/mca/common/libfabric/libfabric/man/man3/fi_compare_atomic.3 similarity index 100% rename from opal/mca/common/libfabric/libfabric/man/fi_compare_atomic.3 rename to opal/mca/common/libfabric/libfabric/man/man3/fi_compare_atomic.3 diff --git a/opal/mca/common/libfabric/libfabric/man/fi_compare_atomic_valid.3 b/opal/mca/common/libfabric/libfabric/man/man3/fi_compare_atomic_valid.3 similarity index 100% rename from opal/mca/common/libfabric/libfabric/man/fi_compare_atomic_valid.3 rename to opal/mca/common/libfabric/libfabric/man/man3/fi_compare_atomic_valid.3 diff --git a/opal/mca/common/libfabric/libfabric/man/fi_compare_atomicmsg.3 b/opal/mca/common/libfabric/libfabric/man/man3/fi_compare_atomicmsg.3 similarity index 100% rename from opal/mca/common/libfabric/libfabric/man/fi_compare_atomicmsg.3 rename to opal/mca/common/libfabric/libfabric/man/man3/fi_compare_atomicmsg.3 diff --git a/opal/mca/common/libfabric/libfabric/man/fi_compare_atomicv.3 b/opal/mca/common/libfabric/libfabric/man/man3/fi_compare_atomicv.3 similarity index 100% rename from opal/mca/common/libfabric/libfabric/man/fi_compare_atomicv.3 rename to opal/mca/common/libfabric/libfabric/man/man3/fi_compare_atomicv.3 diff --git a/opal/mca/common/libfabric/libfabric/man/fi_connect.3 b/opal/mca/common/libfabric/libfabric/man/man3/fi_connect.3 similarity index 100% rename from opal/mca/common/libfabric/libfabric/man/fi_connect.3 rename to opal/mca/common/libfabric/libfabric/man/man3/fi_connect.3 diff --git a/opal/mca/common/libfabric/libfabric/man/fi_control.3 b/opal/mca/common/libfabric/libfabric/man/man3/fi_control.3 similarity index 97% rename from opal/mca/common/libfabric/libfabric/man/fi_control.3 rename to opal/mca/common/libfabric/libfabric/man/man3/fi_control.3 index 391fcb5f18..8487e5b401 100644 --- a/opal/mca/common/libfabric/libfabric/man/fi_control.3 +++ b/opal/mca/common/libfabric/libfabric/man/man3/fi_control.3 @@ -1,4 +1,4 @@ -.TH fi_control 3 "2015\-02\-16" "Libfabric Programmer\[aq]s Manual" "Libfabric v1.0.0rc3" +.TH fi_control 3 "2015\-02\-16" "Libfabric Programmer\[aq]s Manual" "\@VERSION\@" .SH NAME .PP fi_control - Perform an operation on a fabric resource. diff --git a/opal/mca/common/libfabric/libfabric/man/fi_cq.3 b/opal/mca/common/libfabric/libfabric/man/man3/fi_cq.3 similarity index 91% rename from opal/mca/common/libfabric/libfabric/man/fi_cq.3 rename to opal/mca/common/libfabric/libfabric/man/man3/fi_cq.3 index e83ee2450d..dae5f050f8 100644 --- a/opal/mca/common/libfabric/libfabric/man/fi_cq.3 +++ b/opal/mca/common/libfabric/libfabric/man/man3/fi_cq.3 @@ -1,4 +1,4 @@ -.TH fi_cq 3 "2015\-02\-01" "Libfabric Programmer\[aq]s Manual" "Libfabric v1.0.0rc3" +.TH fi_cq 3 "2015\-02\-23" "Libfabric Programmer\[aq]s Manual" "\@VERSION\@" .SH NAME .PP fi_cq - Completion queue operations @@ -411,6 +411,42 @@ Completion flags provide additional details regarding the completed operation. The following completion flags are defined. .PP +*FI_SEND : Indicates that the completion was for a send operation. +This flag may be combined with an FI_MSG or FI_TAGGED flag. +.PP +*FI_RECV : Indicates that the completion was for a receive operation. +This flag may be combined with an FI_MSG or FI_TAGGED flag. +.PP +*FI_RMA : Indicates that an RMA operation completed. +This flag may be combined with an FI_READ, FI_WRITE, FI_REMOTE_READ, or +FI_REMOTE_WRITE flag. +.PP +*FI_ATOMIC : Indicates that an atomic operation completed. +This flag may be combined with an FI_READ, FI_WRITE, FI_REMOTE_READ, or +FI_REMOTE_WRITE flag. +.PP +*FI_MSG : Indicates that a message-based operation completed. +This flag may be combined with an FI_SEND or FI_RECV flag. +.PP +*FI_TAGGED : Indicates that a tagged message operation completed. +This flag may be combined with an FI_SEND or FI_RECV flag. +.PP +*FI_READ : Indicates that a locally initiated RMA or atomic read +operation has completed. +This flag may be combined with an FI_RMA or FI_ATOMIC flag. +.PP +*FI_WRITE : Indicates that a locally initiated RMA or atomic write +operation has completed. +This flag may be combined with an FI_RMA or FI_ATOMIC flag. +.PP +*FI_REMOTE_READ : Indicates that a remotely initiated RMA or atomic read +operation has completed. +This flag may be combined with an FI_RMA or FI_ATOMIC flag. +.PP +*FI_REMOTE_WRITE : Indicates that a remotely initiated RMA or atomic +read operation has completed. +This flag may be combined with an FI_RMA or FI_ATOMIC flag. +.PP *FI_REMOTE_CQ_DATA : This indicates that remote CQ data is available as part of the completion. .PP diff --git a/opal/mca/common/libfabric/libfabric/man/fi_cq_open.3 b/opal/mca/common/libfabric/libfabric/man/man3/fi_cq_open.3 similarity index 100% rename from opal/mca/common/libfabric/libfabric/man/fi_cq_open.3 rename to opal/mca/common/libfabric/libfabric/man/man3/fi_cq_open.3 diff --git a/opal/mca/common/libfabric/libfabric/man/fi_cq_read.3 b/opal/mca/common/libfabric/libfabric/man/man3/fi_cq_read.3 similarity index 100% rename from opal/mca/common/libfabric/libfabric/man/fi_cq_read.3 rename to opal/mca/common/libfabric/libfabric/man/man3/fi_cq_read.3 diff --git a/opal/mca/common/libfabric/libfabric/man/fi_cq_readerr.3 b/opal/mca/common/libfabric/libfabric/man/man3/fi_cq_readerr.3 similarity index 100% rename from opal/mca/common/libfabric/libfabric/man/fi_cq_readerr.3 rename to opal/mca/common/libfabric/libfabric/man/man3/fi_cq_readerr.3 diff --git a/opal/mca/common/libfabric/libfabric/man/fi_cq_readfrom.3 b/opal/mca/common/libfabric/libfabric/man/man3/fi_cq_readfrom.3 similarity index 100% rename from opal/mca/common/libfabric/libfabric/man/fi_cq_readfrom.3 rename to opal/mca/common/libfabric/libfabric/man/man3/fi_cq_readfrom.3 diff --git a/opal/mca/common/libfabric/libfabric/man/fi_cq_sread.3 b/opal/mca/common/libfabric/libfabric/man/man3/fi_cq_sread.3 similarity index 100% rename from opal/mca/common/libfabric/libfabric/man/fi_cq_sread.3 rename to opal/mca/common/libfabric/libfabric/man/man3/fi_cq_sread.3 diff --git a/opal/mca/common/libfabric/libfabric/man/fi_cq_sreadfrom.3 b/opal/mca/common/libfabric/libfabric/man/man3/fi_cq_sreadfrom.3 similarity index 100% rename from opal/mca/common/libfabric/libfabric/man/fi_cq_sreadfrom.3 rename to opal/mca/common/libfabric/libfabric/man/man3/fi_cq_sreadfrom.3 diff --git a/opal/mca/common/libfabric/libfabric/man/fi_cq_strerror.3 b/opal/mca/common/libfabric/libfabric/man/man3/fi_cq_strerror.3 similarity index 100% rename from opal/mca/common/libfabric/libfabric/man/fi_cq_strerror.3 rename to opal/mca/common/libfabric/libfabric/man/man3/fi_cq_strerror.3 diff --git a/opal/mca/common/libfabric/libfabric/man/fi_cq_write.3 b/opal/mca/common/libfabric/libfabric/man/man3/fi_cq_write.3 similarity index 100% rename from opal/mca/common/libfabric/libfabric/man/fi_cq_write.3 rename to opal/mca/common/libfabric/libfabric/man/man3/fi_cq_write.3 diff --git a/opal/mca/common/libfabric/libfabric/man/fi_domain.3 b/opal/mca/common/libfabric/libfabric/man/man3/fi_domain.3 similarity index 92% rename from opal/mca/common/libfabric/libfabric/man/fi_domain.3 rename to opal/mca/common/libfabric/libfabric/man/man3/fi_domain.3 index c2454dc44c..dbda192fb2 100644 --- a/opal/mca/common/libfabric/libfabric/man/fi_domain.3 +++ b/opal/mca/common/libfabric/libfabric/man/man3/fi_domain.3 @@ -1,4 +1,4 @@ -.TH fi_domain 3 "2015\-02\-01" "Libfabric Programmer\[aq]s Manual" "Libfabric v1.0.0rc3" +.TH fi_domain 3 "2015\-02\-28" "Libfabric Programmer\[aq]s Manual" "\@VERSION\@" .SH NAME .PP fi_domain - Open a fabric access domain @@ -97,6 +97,7 @@ struct\ fi_domain_attr\ { \ \ \ \ enum\ fi_progress\ \ \ \ \ \ control_progress; \ \ \ \ enum\ fi_progress\ \ \ \ \ \ data_progress; \ \ \ \ enum\ fi_resource_mgmt\ resource_mgmt; +\ \ \ \ enum\ fi_av_type\ \ \ \ \ \ \ av_type; \ \ \ \ size_t\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ mr_key_size; \ \ \ \ size_t\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ cq_data_size; \ \ \ \ size_t\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ cq_cnt; @@ -360,16 +361,46 @@ For unconnected endpoints, the endpoint must be re-enabled before it will accept new data transfer operations. For connected endpoints, the connection is torn down and must be re-established. -.SS MR Key Size +.SS AV Type (av_type) +.PP +Specifies the type of address vectors that are usable with this domain. +For additional details on AV type, see \f[C]fi_av\f[](3). +The following values may be specified. +.PP +\f[I]FI_AV_UNSPEC\f[] : Any address vector format is requested and +supported. +.PP +\f[I]FI_AV_MAP\f[] : Only address vectors of type AV map are requested +or supported. +.PP +\f[I]FI_AV_TABLE\f[] : Only address vectors of type AV index are +requested or supported. +.PP +Address vectors are only used by connectionless endpoints. +Applications that require the use of a specific type of address vector +should set the domain attribute av_type to the necessary value when +calling fi_getinfo. +The value FI_AV_UNSPEC may be used to indicate that the provider can +support either address vector format. +In this case, a provider may return FI_AV_UNSPEC to indicate that either +format is supportable, or may return another AV type to indicate the +optimal AV type supported by this domain. +.SS MR Key Size (mr_key_size) .PP Size of the memory region remote access key, in bytes. Applications that request their own MR key must select a value within the range specified by this value. -.SS CQ Data Size +.SS CQ Data Size (cq_data_size) .PP -The number of bytes that the provider supports for remote CQ data. -See the FI_REMOTE_CQ_DATA flag (fi_getinfo) for the use of remote CQ -data. +Applications may include a small message with a data transfer that is +placed directly into a remote completion queue as part of a completion +event. +This is referred to as remote CQ data (sometimes referred to as +immediate data). +This field indicates the number of bytes that the provider supports for +remote CQ data. +If supported (non-zero value is returned), the minimum size of remote CQ +data must be at least 4-bytes. .SS Completion Queue Count (cq_cnt) .PP The total number of completion queues supported by the domain, relative diff --git a/opal/mca/common/libfabric/libfabric/man/fi_domain_bind.3 b/opal/mca/common/libfabric/libfabric/man/man3/fi_domain_bind.3 similarity index 100% rename from opal/mca/common/libfabric/libfabric/man/fi_domain_bind.3 rename to opal/mca/common/libfabric/libfabric/man/man3/fi_domain_bind.3 diff --git a/opal/mca/common/libfabric/libfabric/man/fi_domain_query.3 b/opal/mca/common/libfabric/libfabric/man/man3/fi_domain_query.3 similarity index 100% rename from opal/mca/common/libfabric/libfabric/man/fi_domain_query.3 rename to opal/mca/common/libfabric/libfabric/man/man3/fi_domain_query.3 diff --git a/opal/mca/common/libfabric/libfabric/man/fi_dupinfo.3 b/opal/mca/common/libfabric/libfabric/man/man3/fi_dupinfo.3 similarity index 100% rename from opal/mca/common/libfabric/libfabric/man/fi_dupinfo.3 rename to opal/mca/common/libfabric/libfabric/man/man3/fi_dupinfo.3 diff --git a/opal/mca/common/libfabric/libfabric/man/fi_enable.3 b/opal/mca/common/libfabric/libfabric/man/man3/fi_enable.3 similarity index 100% rename from opal/mca/common/libfabric/libfabric/man/fi_enable.3 rename to opal/mca/common/libfabric/libfabric/man/man3/fi_enable.3 diff --git a/opal/mca/common/libfabric/libfabric/man/fi_endpoint.3 b/opal/mca/common/libfabric/libfabric/man/man3/fi_endpoint.3 similarity index 94% rename from opal/mca/common/libfabric/libfabric/man/fi_endpoint.3 rename to opal/mca/common/libfabric/libfabric/man/man3/fi_endpoint.3 index 4a6e0a97e1..829f77f0ef 100644 --- a/opal/mca/common/libfabric/libfabric/man/fi_endpoint.3 +++ b/opal/mca/common/libfabric/libfabric/man/man3/fi_endpoint.3 @@ -1,4 +1,4 @@ -.TH fi_endpoint 3 "2015\-02\-12" "Libfabric Programmer\[aq]s Manual" "Libfabric v1.0.0rc3" +.TH fi_endpoint 3 "2015\-02\-27" "Libfabric Programmer\[aq]s Manual" "\@VERSION\@" .SH NAME .PP fi_endpoint - Fabric endpoint operations @@ -222,6 +222,14 @@ When closing a scalable endpoint, there must be no opened transmit contexts, or receive contexts associated with the scalable endpoint. If resources are still associated with the scalable endpoint when attempting to close, the call will return -FI_EBUSY. +.PP +Outstanding operations posted to the endpoint when fi_close is called +will be discarded. +Discarded operations will silently be dropped, with no completions +reported. +Additionally, a provider may discard previously completed operations +from the associated completion queue(s). +The behavior to discard completed operations is provider specific. .SS fi_ep_bind .PP fi_ep_bind is used to associate an endpoint with hardware resources. @@ -415,6 +423,13 @@ needed on receives posted after the value has been changed. It is recommended that applications that want to override the default MIN_MULTI_RECV value set this option before enabling the corresponding endpoint. +.IP \[bu] 2 +\f[I]FI_OPT_CM_DATA_SIZE - size_t\f[] : Defines the size of available +space in CM messages for user-defined data. +This value limits the amount of data that applications can exchange +between peer endpoints using the fi_connect, fi_accept, and fi_reject +operations. +This option is read only. .SS fi_rx_size_left .PP The fi_rx_size_left call returns a lower bound on the number of receive @@ -443,23 +458,44 @@ an endpoint. .nf \f[C] struct\ fi_ep_attr\ { -\ \ \ \ uint32_t\ \ protocol; -\ \ \ \ uint32_t\ \ protocol_version; -\ \ \ \ size_t\ \ \ \ max_msg_size; -\ \ \ \ size_t\ \ \ \ inject_size; -\ \ \ \ size_t\ \ \ \ total_buffered_recv; -\ \ \ \ size_t\ \ \ \ msg_prefix_size; -\ \ \ \ size_t\ \ \ \ max_order_raw_size; -\ \ \ \ size_t\ \ \ \ max_order_war_size; -\ \ \ \ size_t\ \ \ \ max_order_waw_size; -\ \ \ \ uint64_t\ \ mem_tag_format; -\ \ \ \ uint64_t\ \ msg_order; -\ \ \ \ uint64_t\ \ comp_order; -\ \ \ \ size_t\ \ \ \ tx_ctx_cnt; -\ \ \ \ size_t\ \ \ \ rx_ctx_cnt; +\ \ \ \ enum\ fi_ep_type\ ep_type; +\ \ \ \ uint32_t\ \ \ \ \ \ \ \ protocol; +\ \ \ \ uint32_t\ \ \ \ \ \ \ \ protocol_version; +\ \ \ \ size_t\ \ \ \ \ \ \ \ \ \ max_msg_size; +\ \ \ \ size_t\ \ \ \ \ \ \ \ \ \ msg_prefix_size; +\ \ \ \ size_t\ \ \ \ \ \ \ \ \ \ max_order_raw_size; +\ \ \ \ size_t\ \ \ \ \ \ \ \ \ \ max_order_war_size; +\ \ \ \ size_t\ \ \ \ \ \ \ \ \ \ max_order_waw_size; +\ \ \ \ uint64_t\ \ \ \ \ \ \ \ mem_tag_format; +\ \ \ \ uint64_t\ \ \ \ \ \ \ \ msg_order; +\ \ \ \ uint64_t\ \ \ \ \ \ \ \ comp_order; +\ \ \ \ size_t\ \ \ \ \ \ \ \ \ \ tx_ctx_cnt; +\ \ \ \ size_t\ \ \ \ \ \ \ \ \ \ rx_ctx_cnt; }; \f[] .fi +.SS type - Endpoint Type +.PP +If specified, indicates the type of fabric interface communication +desired. +Supported types are: +.PP +\f[I]FI_EP_UNSPEC\f[] : The type of endpoint is not specified. +This is usually provided as input, with other attributes of the endpoint +or the provider selecting the type. +.PP +\f[I]FI_EP_MSG\f[] : Provides a reliable, connection-oriented data +transfer service with flow control that maintains message boundaries. +.PP +\f[I]FI_EP_DGRAM\f[] : Supports a connectionless, unreliable datagram +communication. +Message boundaries are maintained, but the maximum message size may be +limited to the fabric MTU. +Flow control is not guaranteed. +.PP +\f[I]FI_EP_RDM\f[] : Reliable datagram message. +Provides a reliable, unconnected data transfer service with flow control +that maintains message boundaries. .SS Protocol .PP Specifies the low-level end to end protocol employed by the provider. @@ -508,15 +544,6 @@ lesser version. .PP Defines the maximum size for an application data transfer as a single operation. -.SS inject_size - Inject Size -.PP -Defines the default inject operation size (see the FI_INJECT flag) that -an endpoint will support. -This value applies per send operation. -.SS total_buffered_recv - Total Buffered Receive -.PP -Defines the total available space allocated by the provider to buffer -received messages (see the FI_BUFFERED_RECV flag). .SS msg_prefix_size - Message Prefix Size .PP Specifies the size of any required message prefix buffer space. @@ -848,8 +875,6 @@ See the fi_endpoint Completion Ordering section. .PP \f[I]inject_size\f[] : The requested inject operation size (see the FI_INJECT flag) that the context will support. -This value must be equal to or less than the inject_size of the -associated endpoint. See the fi_endpoint Inject Size section. .PP \f[I]size\f[] : The size of the context, in bytes. @@ -946,10 +971,10 @@ specified of the associated endpoint. See the fi_endpoint Completion Ordering section. .PP \f[I]total_buffered_recv\f[] : Defines the total available space -allocated by the provider to buffer received messages on the context. -This value must be less than or equal to that specified for the -associated endpoint. -See the fi_endpoint Total Buffered Receive section. +allocated by the provider to buffer messages that are received for which +there is no matching receive operation. +If set to 0, any messages that arrive before a receive buffer has been +posted are lost. .PP \f[I]size\f[] : The size of the context, in bytes. The size is usually used as an output value by applications wishing to @@ -1024,15 +1049,13 @@ data transfer operations, where a flags parameter is not available. Data transfer operations that take flags as input override the op_flags value of an endpoint. .PP -\f[I]FI_INJECT\f[] : Indicates that all outbound data buffer should be +\f[I]FI_INJECT\f[] : Indicates that all outbound data buffers should be returned to the user\[aq]s control immediately after a data transfer call returns, even if the operation is handled asynchronously. This may require that the provider copy the data into a local buffer and transfer out of that buffer. A provider may limit the total amount of send data that may be buffered and/or the size of a single send. -Applications may discover and modify these limits using the -endpoint\[aq]s getopt and setopt interfaces. .PP \f[I]FI_MULTI_RECV\f[] : Applies to posted receive operations. This flag allows the user to post a single buffer that will receive diff --git a/opal/mca/common/libfabric/libfabric/man/fi_ep_bind.3 b/opal/mca/common/libfabric/libfabric/man/man3/fi_ep_bind.3 similarity index 100% rename from opal/mca/common/libfabric/libfabric/man/fi_ep_bind.3 rename to opal/mca/common/libfabric/libfabric/man/man3/fi_ep_bind.3 diff --git a/opal/mca/common/libfabric/libfabric/man/fi_eq.3 b/opal/mca/common/libfabric/libfabric/man/man3/fi_eq.3 similarity index 88% rename from opal/mca/common/libfabric/libfabric/man/fi_eq.3 rename to opal/mca/common/libfabric/libfabric/man/man3/fi_eq.3 index 714eb28472..7ea998e170 100644 --- a/opal/mca/common/libfabric/libfabric/man/fi_eq.3 +++ b/opal/mca/common/libfabric/libfabric/man/man3/fi_eq.3 @@ -1,4 +1,4 @@ -.TH fi_eq 3 "2015\-02\-01" "Libfabric Programmer\[aq]s Manual" "Libfabric v1.0.0rc3" +.TH fi_eq 3 "2015\-02\-23" "Libfabric Programmer\[aq]s Manual" "\@VERSION\@" .SH NAME .PP fi_eq - Event queue operations @@ -215,8 +215,8 @@ information regarding the format associated with each event. \f[I]Asynchronous Control Operations\f[] : Asynchronous control operations are basic requests that simply need to generate an event to indicate that they have completed. -These include the following types of events: memory registration, -address vector resolution, and connection established. +These include the following types of events: memory registration and +address vector resolution. .PP Control requests report their completion by inserting a \f[C]struct\ \ \ fi_eq_entry\f[] into the EQ. @@ -236,16 +236,18 @@ For the completion of basic asynchronous control operations, the returned event will indicate the operation that has completed, and the fid will reference the fabric descriptor associated with the event. For memory registration, this will be an FI_MR_COMPLETE event and the -fid_mr, address resolution will reference an FI_AV_COMPLETE event and -fid_av, and CM events will refer to a FI_CONNECTED event and fid_ep. +fid_mr; address resolution will reference an FI_AV_COMPLETE event and +fid_av. The context field will be set to the context specified as part of the operation, if available, otherwise the context will be associated with the fabric descriptor. .PP -\f[I]Connection Request Notification\f[] : Connection requests are -unsolicited notifications that a remote endpoint wishes to establish a -new connection to a listening passive endpoint. -Connection requests are reported using +\f[I]Connection Notification\f[] : Connection notifications are +connection management notifications used to setup or teardown +connections between endpoints. +There are three connection notification events: FI_CONNREQ, +FI_CONNECTED, and FI_SHUTDOWN. +Connection notifications are reported using \f[C]struct\ \ \ fi_eq_cm_entry\f[]: .IP .nf @@ -258,10 +260,12 @@ struct\ fi_eq_cm_entry\ { \f[] .fi .PP -Connection request events are of type FI_CONNREQ. +A connection request (FI_CONNREQ) event indicates that a remote endpoint +wishes to establish a new connection to a listening, or passive, +endpoint. The fid is the passive endpoint. -Information regarding the requested endpoint\[aq]s capabilities and -attributes are available from the info field. +Information regarding the requested, active endpoint\[aq]s capabilities +and attributes are available from the info field. The application is responsible for freeing this structure by calling fi_freeinfo when it is no longer needed. The fi_info connreq field will reference the connection request @@ -287,13 +291,20 @@ protocol padding. As a result, the returned length may be larger than that specified by the connecting peer. .PP -\f[I]Connection Shutdown Notification\f[] : Notification that a remote -peer has disconnected from an active endpoint is done through the -FI_SHUTDOWN event. -Shutdown notification uses struct fi_eq_entry as declared above. +If a connection request has been accepted, an FI_CONNECTED event will be +generated on both sides of the connection. +The active side -- one that called fi_connect() -- may receive user data +as part of the FI_CONNECTED event. +The user data is passed to the connection manager on the passive side +through the fi_accept call. +User data is not provided with an FI_CONNECTED event on the listening +side of the connection. +.PP +Notification that a remote peer has disconnected from an active endpoint +is done through the FI_SHUTDOWN event. +Shutdown notification uses struct fi_eq_cm_entry as declared above. The fid field for a shutdown notification refers to the active endpoint\[aq]s fid_ep. -The context field is set to NULL. .SS fi_eq_sread .PP The fi_eq_sread call is the blocking (or synchronous) equivalent to @@ -333,6 +344,7 @@ struct\ fi_eq_err_entry\ { \ \ \ \ int\ \ \ \ \ \ \ \ \ \ \ \ \ \ err;\ \ \ \ \ \ \ \ /*\ positive\ error\ code\ */ \ \ \ \ int\ \ \ \ \ \ \ \ \ \ \ \ \ \ prov_errno;\ /*\ provider\ error\ code\ */ \ \ \ \ void\ \ \ \ \ \ \ \ \ \ \ \ *err_data;\ \ \ /*\ additional\ error\ data\ */ +\ \ \ \ size_t\ \ \ \ \ \ \ \ \ \ \ err_data_size;\ /*\ size\ of\ err_data\ */ }; \f[] .fi @@ -344,10 +356,15 @@ The context field will be set to the context specified as part of the operation. .PP The general reason for the error is provided through the err field. -Provider specific error information may also be available through the -prov_errno and err_data fields. +Provider or operational specific error information may also be available +through the prov_errno and err_data fields. Users may call fi_eq_strerror to convert provider specific error information into a printable string for debugging purposes. +.PP +If err_data_size is > 0, then the buffer referenced by err_data is +directly user-accessible. +Applications which read the err_data buffer must ensure that they do not +read past the end of the referenced buffer. .SH RETURN VALUES .PP fi_eq_open : Returns 0 on success. diff --git a/opal/mca/common/libfabric/libfabric/man/fi_eq_open.3 b/opal/mca/common/libfabric/libfabric/man/man3/fi_eq_open.3 similarity index 100% rename from opal/mca/common/libfabric/libfabric/man/fi_eq_open.3 rename to opal/mca/common/libfabric/libfabric/man/man3/fi_eq_open.3 diff --git a/opal/mca/common/libfabric/libfabric/man/fi_eq_read.3 b/opal/mca/common/libfabric/libfabric/man/man3/fi_eq_read.3 similarity index 100% rename from opal/mca/common/libfabric/libfabric/man/fi_eq_read.3 rename to opal/mca/common/libfabric/libfabric/man/man3/fi_eq_read.3 diff --git a/opal/mca/common/libfabric/libfabric/man/fi_eq_readerr.3 b/opal/mca/common/libfabric/libfabric/man/man3/fi_eq_readerr.3 similarity index 100% rename from opal/mca/common/libfabric/libfabric/man/fi_eq_readerr.3 rename to opal/mca/common/libfabric/libfabric/man/man3/fi_eq_readerr.3 diff --git a/opal/mca/common/libfabric/libfabric/man/fi_eq_sread.3 b/opal/mca/common/libfabric/libfabric/man/man3/fi_eq_sread.3 similarity index 100% rename from opal/mca/common/libfabric/libfabric/man/fi_eq_sread.3 rename to opal/mca/common/libfabric/libfabric/man/man3/fi_eq_sread.3 diff --git a/opal/mca/common/libfabric/libfabric/man/fi_eq_strerror.3 b/opal/mca/common/libfabric/libfabric/man/man3/fi_eq_strerror.3 similarity index 100% rename from opal/mca/common/libfabric/libfabric/man/fi_eq_strerror.3 rename to opal/mca/common/libfabric/libfabric/man/man3/fi_eq_strerror.3 diff --git a/opal/mca/common/libfabric/libfabric/man/fi_eq_write.3 b/opal/mca/common/libfabric/libfabric/man/man3/fi_eq_write.3 similarity index 100% rename from opal/mca/common/libfabric/libfabric/man/fi_eq_write.3 rename to opal/mca/common/libfabric/libfabric/man/man3/fi_eq_write.3 diff --git a/opal/mca/common/libfabric/libfabric/man/fi_errno.3 b/opal/mca/common/libfabric/libfabric/man/man3/fi_errno.3 similarity index 99% rename from opal/mca/common/libfabric/libfabric/man/fi_errno.3 rename to opal/mca/common/libfabric/libfabric/man/man3/fi_errno.3 index 64b0f1c489..4113e6ca9b 100644 --- a/opal/mca/common/libfabric/libfabric/man/fi_errno.3 +++ b/opal/mca/common/libfabric/libfabric/man/man3/fi_errno.3 @@ -1,4 +1,4 @@ -.TH fi_errno 3 "2015\-01\-08" "Libfabric Programmer\[aq]s Manual" "Libfabric v1.0.0rc3" +.TH fi_errno 3 "2015\-01\-08" "Libfabric Programmer\[aq]s Manual" "\@VERSION\@" .SH NAME .PP fi_errno - fabric errors diff --git a/opal/mca/common/libfabric/libfabric/man/fi_fabric.3 b/opal/mca/common/libfabric/libfabric/man/man3/fi_fabric.3 similarity index 99% rename from opal/mca/common/libfabric/libfabric/man/fi_fabric.3 rename to opal/mca/common/libfabric/libfabric/man/man3/fi_fabric.3 index 17d629851d..d20edeee49 100644 --- a/opal/mca/common/libfabric/libfabric/man/fi_fabric.3 +++ b/opal/mca/common/libfabric/libfabric/man/man3/fi_fabric.3 @@ -1,4 +1,4 @@ -.TH fi_fabric 3 "2015\-01\-24" "Libfabric Programmer\[aq]s Manual" "Libfabric v1.0.0rc3" +.TH fi_fabric 3 "2015\-01\-24" "Libfabric Programmer\[aq]s Manual" "\@VERSION\@" .SH NAME .PP fi_fabric - Fabric domain operations diff --git a/opal/mca/common/libfabric/libfabric/man/fi_fetch_atomic.3 b/opal/mca/common/libfabric/libfabric/man/man3/fi_fetch_atomic.3 similarity index 100% rename from opal/mca/common/libfabric/libfabric/man/fi_fetch_atomic.3 rename to opal/mca/common/libfabric/libfabric/man/man3/fi_fetch_atomic.3 diff --git a/opal/mca/common/libfabric/libfabric/man/fi_fetch_atomic_valid.3 b/opal/mca/common/libfabric/libfabric/man/man3/fi_fetch_atomic_valid.3 similarity index 100% rename from opal/mca/common/libfabric/libfabric/man/fi_fetch_atomic_valid.3 rename to opal/mca/common/libfabric/libfabric/man/man3/fi_fetch_atomic_valid.3 diff --git a/opal/mca/common/libfabric/libfabric/man/fi_fetch_atomicmsg.3 b/opal/mca/common/libfabric/libfabric/man/man3/fi_fetch_atomicmsg.3 similarity index 100% rename from opal/mca/common/libfabric/libfabric/man/fi_fetch_atomicmsg.3 rename to opal/mca/common/libfabric/libfabric/man/man3/fi_fetch_atomicmsg.3 diff --git a/opal/mca/common/libfabric/libfabric/man/fi_fetch_atomicv.3 b/opal/mca/common/libfabric/libfabric/man/man3/fi_fetch_atomicv.3 similarity index 100% rename from opal/mca/common/libfabric/libfabric/man/fi_fetch_atomicv.3 rename to opal/mca/common/libfabric/libfabric/man/man3/fi_fetch_atomicv.3 diff --git a/opal/mca/common/libfabric/libfabric/man/fi_freeinfo.3 b/opal/mca/common/libfabric/libfabric/man/man3/fi_freeinfo.3 similarity index 100% rename from opal/mca/common/libfabric/libfabric/man/fi_freeinfo.3 rename to opal/mca/common/libfabric/libfabric/man/man3/fi_freeinfo.3 diff --git a/opal/mca/common/libfabric/libfabric/man/fi_getinfo.3 b/opal/mca/common/libfabric/libfabric/man/man3/fi_getinfo.3 similarity index 87% rename from opal/mca/common/libfabric/libfabric/man/fi_getinfo.3 rename to opal/mca/common/libfabric/libfabric/man/man3/fi_getinfo.3 index 709638fadc..b6f6bb7df6 100644 --- a/opal/mca/common/libfabric/libfabric/man/fi_getinfo.3 +++ b/opal/mca/common/libfabric/libfabric/man/man3/fi_getinfo.3 @@ -1,7 +1,9 @@ -.TH fi_getinfo 3 "2015\-02\-11" "Libfabric Programmer\[aq]s Manual" "Libfabric v1.0.0rc3" +.TH fi_getinfo 3 "2015\-02\-28" "Libfabric Programmer\[aq]s Manual" "\@VERSION\@" .SH NAME .PP fi_getinfo / fi_freeinfo - Obtain / free fabric interface information +.PP +fi_allocinfo / fi_dupinfo - Allocate / duplicate an fi_info structure .SH SYNOPSIS .IP .nf @@ -13,6 +15,8 @@ int\ fi_getinfo(int\ version,\ const\ char\ *node,\ const\ char\ *service, void\ fi_freeinfo(struct\ fi_info\ *info); +struct\ fi_info\ *fi_allocinfo(void); + struct\ fi_info\ *fi_dupinfo(const\ struct\ fi_info\ *info); \f[] .fi @@ -33,12 +37,11 @@ criteria for selecting the returned fabric information. containing response information. .SH DESCRIPTION .PP -Returns information about available fabric services for reaching the -specified node or service, subject to any provided hints. -Callers must provide at least one of the node, service, or hints -parameters. -If node and service are NULL, then the hints src_addr and/or dest_addr -fields of the fi_info structure must be specified. +fi_getinfo returns information about available fabric services for +reaching specified node or service, subject to any provided hints. +Callers may specify NULL for node, service, and hints in order to +retrieve information about what providers are available and their +optimal usage models. If no matching fabric information is available, info will be set to NULL. .PP @@ -52,10 +55,10 @@ additional criteria in their search hints. Relaxing or eliminating input hints will increase the number and type of endpoints that are available. Providers that return multiple endpoints to a single fi_getinfo call -should return the endpoints that are highest performing. +should return the endpoints that are highest performing first. Providers may indicate that an endpoint and domain can support additional capabilities than those requested by the user only if such -support will not adversely affect performance. +support will not adversely affect application performance or security. .PP The version parameter is used by the application to request the desired version of the interfaces. @@ -73,8 +76,8 @@ This protects against the application being built from source against a newer version of the library that introduces new fields to data structures, which would not be initialized by the application. .PP -Either node, service, or hints must be provided, with any combination -being supported. +Node, service, or hints may be provided, with any combination being +supported. If node is provided, fi_getinfo will attempt to resolve the fabric address to the given node. The hints parameter, if provided, may be used to control the resulting @@ -84,6 +87,11 @@ addressing information based on the provided hints. .PP The caller must call fi_freeinfo to release fi_info structures returned by this call. +.PP +The fi_allocinfo call will allocate and zero an fi_info structure and +all related substructures. +The fi_dupinfo will duplicate a single fi_info structure and all the +substructures within it. .SH FI_INFO .IP .nf @@ -92,7 +100,6 @@ struct\ fi_info\ { \ \ \ \ struct\ fi_info\ \ \ \ \ \ \ \ *next; \ \ \ \ uint64_t\ \ \ \ \ \ \ \ \ \ \ \ \ \ caps; \ \ \ \ uint64_t\ \ \ \ \ \ \ \ \ \ \ \ \ \ mode; -\ \ \ \ enum\ fi_ep_type\ \ \ \ \ \ \ ep_type; \ \ \ \ uint32_t\ \ \ \ \ \ \ \ \ \ \ \ \ \ addr_format; \ \ \ \ size_t\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ src_addrlen; \ \ \ \ size_t\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ dest_addrlen; @@ -119,10 +126,6 @@ below. \f[I]mode\f[] : Operational modes supported by the application. See the \f[I]Mode\f[] section below. .PP -\f[I]ep_type - endpoint type\f[] : If specified, indicates the type of -fabric interface communication desired. -Supported types are listed in the \f[I]Endpoint types\f[] section below. -.PP \f[I]addr_format - address format\f[] : If specified, indicates the format of addresses referenced by the fabric interfaces and data structures. @@ -130,15 +133,17 @@ Supported formats are listed in the \f[I]Addressing formats\f[] section below. .PP \f[I]src_addrlen - source address length\f[] : Indicates the length of -the source address (must be specified if \f[I]src_addr\f[] is -specified). -This field will be ignored in hints if FI_SOURCE is specified. +the source address. +This value must be > 0 if \f[I]src_addr\f[] is non-NULL. +This field will be ignored in hints if FI_SOURCE is specified, or +\f[I]src_addr\f[] is NULL. .PP \f[I]dest_addrlen - destination address length\f[] : Indicates the -length of the destination address (must be specified if -\f[I]dest_addr\f[] is specified). +length of the destination address. +This value must be > 0 if \f[I]dest_addr\f[] is non-NULL. This field will be ignored in hints unless the node and service -parameters are NULL or FI_SOURCE is specified. +parameters are NULL or FI_SOURCE is specified, or if \f[I]dst_addr\f[] +is NULL. .PP \f[I]src_addr - source address\f[] : If specified, indicates the source address. @@ -210,7 +215,7 @@ Interface capabilities are obtained by OR-ing the following flags together. If capabilities in the hint parameter are set to 0, the underlying provider will return the set of capabilities which are supported. -Otherwise, providers will only return data matching the specified set of +Otherwise, providers will return data matching the specified set of capabilities. Providers may indicate support for additional capabilities beyond those requested when the use of expanded capabilities will not adversely @@ -275,28 +280,12 @@ regions be backed by allocated memory pages. multiple receive contexts allow an initiator to target (or name) a specific receive context as part of a data transfer operation. .PP -\f[I]FI_BUFFERED_RECV\f[] : Requests that the communication endpoint -should attempt to queue inbound data that arrives before a receive -buffer has been posted. -In the absence of this flag, any messages that arrive before a receive -is posted are lost. -Applications may access endpoint options (getopt/setopt) to determine -the size of available buffered receive space. -.PP \f[I]FI_DIRECTED_RECV\f[] : Requests that the communication endpoint use the source address of an incoming message when matching it with a receive buffer. If this capability is not set, then the src_addr parameter for msg and tagged receive operations is ignored. .PP -\f[I]FI_INJECT\f[] : Indicates that the endpoint be able to support the -FI_INJECT flag on data transfer operations and the \[aq]inject\[aq] data -transfer calls. -The minimum supported size of an inject operation that an endpoint with -this capability must support is 8-bytes. -Applications may access endpoint options (getopt/setopt) to determine -injected transfer limits. -.PP \f[I]FI_MULTI_RECV\f[] : Specifies that the endpoint must support the FI_MULTI_RECV flag when posting receive buffers. .PP @@ -334,18 +323,6 @@ This flag requires that FI_RMA and/or FI_ATOMIC be set. capable of receiving write memory operations from remote endpoints. This flag requires that FI_RMA and/or FI_ATOMIC be set. .PP -\f[I]FI_REMOTE_CQ_DATA\f[] : Applications may include a small message -with a data transfer that is placed directly into a remote event queue -as part of a completion event. -This is referred to as remote CQ data (sometimes referred to as -immediate data). -The FI_REMOTE_CQ_DATA indicates that an endpoint must support the -FI_REMOTE_CQ_DATA flag on data transfer operations. -The minimum supported size of remote CQ data that an endpoint with this -capability must support is 4-bytes. -Applications may check the domain attributes to determine remote CQ data -limits. -.PP \f[I]FI_REMOTE_SIGNAL\f[] : Indicates that the endpoint support the FI_REMOTE_SIGNAL flag on data transfer operations. Support requires marking outbound data transfers as signaled and @@ -374,6 +351,24 @@ operation. Fenced operations are often used to enforce ordering between operations that are not otherwise guaranteed by the underlying provider or protocol. +.PP +Capabilities may be grouped into two general categories: primary and +secondary. +Primary capabilities must explicitly be requested by an application, and +a provider must enable support for only those primary capabilities which +were selected. +Secondary capabilities may optionally be requested by an application. +If requested, a provider must support the capability or fail the +fi_getinfo request (FI_ENOSYS). +A provider may optionally report non-selected secondary capabilities if +doing so would not compromise performance or security. +.PP +Primary capabilities: FI_MSG, FI_RMA, FI_TAGGED, FI_ATOMIC, +FI_NAMED_RX_CTX, FI_DIRECTD_RECV, FI_READ, FI_WRITE, FI_RECV, FI_SEND, +FI_REMOTE_READ, and FI_REMOTE_WRITE. +.PP +Secondary capabilities: FI_DYNAMIC_MR, FI_MULTI_RECV, FI_SOURCE, +FI_CANCEL, FI_FENCE, FI_REMOTE_COMPLETE .SH MODE .PP The operational mode bits are used to convey requirements that an @@ -473,24 +468,6 @@ The FI_ASYNC_IOV mode indicates that the application must provide the buffering needed for the IO vectors. When set, an application must not modify an IO vector until the associated operation has completed. -.SH ENDPOINT TYPES -.PP -\f[I]FI_EP_UNSPEC\f[] : The type of endpoint is not specified. -This is usually provided as input, with other attributes of the endpoint -or the provider selecting the type. -.PP -\f[I]FI_EP_MSG\f[] : Provides a reliable, connection-oriented data -transfer service with flow control that maintains message boundaries. -.PP -\f[I]FI_EP_DGRAM\f[] : Supports a connectionless, unreliable datagram -communication. -Message boundaries are maintained, but the maximum message size may be -limited to the fabric MTU. -Flow control is not guaranteed. -.PP -\f[I]FI_EP_RDM\f[] : Reliable datagram message. -Provides a reliable, unconnected data transfer service with flow control -that maintains message boundaries. .SH ADDRESSING FORMATS .PP Multiple fabric interfaces take as input either a source or destination @@ -552,11 +529,13 @@ On error, fi_getinfo() returns a negative value corresponding to fabric errno. Fabric errno values are defined in \f[C]rdma/fi_errno.h\f[]. .PP +fi_allocinfo() returns a pointer to a new fi_info structure on success, +or NULL on error. fi_dupinfo() duplicates a single fi_info structure and all the -substructures within it and returns a pointer to the new fi_info -structure. -This new fi_info structure must be freed via fi_freeinfo(). -fi_dupinfo() returns NULL on error. +substructures within it, returning a pointer to the new fi_info +structure on success, or NULL on error. +Both calls require that the returned fi_info structure be freed via +fi_freeinfo(). .SH ERRORS .PP \f[I]FI_EBADFLAGS\f[] : The specified endpoint or domain capability or diff --git a/opal/mca/common/libfabric/libfabric/man/fi_getname.3 b/opal/mca/common/libfabric/libfabric/man/man3/fi_getname.3 similarity index 100% rename from opal/mca/common/libfabric/libfabric/man/fi_getname.3 rename to opal/mca/common/libfabric/libfabric/man/man3/fi_getname.3 diff --git a/opal/mca/common/libfabric/libfabric/man/fi_getopt.3 b/opal/mca/common/libfabric/libfabric/man/man3/fi_getopt.3 similarity index 100% rename from opal/mca/common/libfabric/libfabric/man/fi_getopt.3 rename to opal/mca/common/libfabric/libfabric/man/man3/fi_getopt.3 diff --git a/opal/mca/common/libfabric/libfabric/man/fi_getpeer.3 b/opal/mca/common/libfabric/libfabric/man/man3/fi_getpeer.3 similarity index 100% rename from opal/mca/common/libfabric/libfabric/man/fi_getpeer.3 rename to opal/mca/common/libfabric/libfabric/man/man3/fi_getpeer.3 diff --git a/opal/mca/common/libfabric/libfabric/man/fi_inject.3 b/opal/mca/common/libfabric/libfabric/man/man3/fi_inject.3 similarity index 100% rename from opal/mca/common/libfabric/libfabric/man/fi_inject.3 rename to opal/mca/common/libfabric/libfabric/man/man3/fi_inject.3 diff --git a/opal/mca/common/libfabric/libfabric/man/fi_inject_atomic.3 b/opal/mca/common/libfabric/libfabric/man/man3/fi_inject_atomic.3 similarity index 100% rename from opal/mca/common/libfabric/libfabric/man/fi_inject_atomic.3 rename to opal/mca/common/libfabric/libfabric/man/man3/fi_inject_atomic.3 diff --git a/opal/mca/common/libfabric/libfabric/man/fi_inject_write.3 b/opal/mca/common/libfabric/libfabric/man/man3/fi_inject_write.3 similarity index 100% rename from opal/mca/common/libfabric/libfabric/man/fi_inject_write.3 rename to opal/mca/common/libfabric/libfabric/man/man3/fi_inject_write.3 diff --git a/opal/mca/common/libfabric/libfabric/man/fi_inject_writedata.3 b/opal/mca/common/libfabric/libfabric/man/man3/fi_inject_writedata.3 similarity index 100% rename from opal/mca/common/libfabric/libfabric/man/fi_inject_writedata.3 rename to opal/mca/common/libfabric/libfabric/man/man3/fi_inject_writedata.3 diff --git a/opal/mca/common/libfabric/libfabric/man/fi_injectdata.3 b/opal/mca/common/libfabric/libfabric/man/man3/fi_injectdata.3 similarity index 100% rename from opal/mca/common/libfabric/libfabric/man/fi_injectdata.3 rename to opal/mca/common/libfabric/libfabric/man/man3/fi_injectdata.3 diff --git a/opal/mca/common/libfabric/libfabric/man/fi_join.3 b/opal/mca/common/libfabric/libfabric/man/man3/fi_join.3 similarity index 100% rename from opal/mca/common/libfabric/libfabric/man/fi_join.3 rename to opal/mca/common/libfabric/libfabric/man/man3/fi_join.3 diff --git a/opal/mca/common/libfabric/libfabric/man/fi_leave.3 b/opal/mca/common/libfabric/libfabric/man/man3/fi_leave.3 similarity index 100% rename from opal/mca/common/libfabric/libfabric/man/fi_leave.3 rename to opal/mca/common/libfabric/libfabric/man/man3/fi_leave.3 diff --git a/opal/mca/common/libfabric/libfabric/man/fi_listen.3 b/opal/mca/common/libfabric/libfabric/man/man3/fi_listen.3 similarity index 100% rename from opal/mca/common/libfabric/libfabric/man/fi_listen.3 rename to opal/mca/common/libfabric/libfabric/man/man3/fi_listen.3 diff --git a/opal/mca/common/libfabric/libfabric/man/fi_mr.3 b/opal/mca/common/libfabric/libfabric/man/man3/fi_mr.3 similarity index 99% rename from opal/mca/common/libfabric/libfabric/man/fi_mr.3 rename to opal/mca/common/libfabric/libfabric/man/man3/fi_mr.3 index f7ffc471ea..d80659107f 100644 --- a/opal/mca/common/libfabric/libfabric/man/fi_mr.3 +++ b/opal/mca/common/libfabric/libfabric/man/man3/fi_mr.3 @@ -1,4 +1,4 @@ -.TH fi_mr 3 "2015\-01\-29" "Libfabric Programmer\[aq]s Manual" "Libfabric v1.0.0rc3" +.TH fi_mr 3 "2015\-01\-29" "Libfabric Programmer\[aq]s Manual" "\@VERSION\@" .SH NAME .PP fi_mr - Memory region operations diff --git a/opal/mca/common/libfabric/libfabric/man/fi_mr_bind.3 b/opal/mca/common/libfabric/libfabric/man/man3/fi_mr_bind.3 similarity index 100% rename from opal/mca/common/libfabric/libfabric/man/fi_mr_bind.3 rename to opal/mca/common/libfabric/libfabric/man/man3/fi_mr_bind.3 diff --git a/opal/mca/common/libfabric/libfabric/man/fi_mr_desc.3 b/opal/mca/common/libfabric/libfabric/man/man3/fi_mr_desc.3 similarity index 100% rename from opal/mca/common/libfabric/libfabric/man/fi_mr_desc.3 rename to opal/mca/common/libfabric/libfabric/man/man3/fi_mr_desc.3 diff --git a/opal/mca/common/libfabric/libfabric/man/fi_mr_key.3 b/opal/mca/common/libfabric/libfabric/man/man3/fi_mr_key.3 similarity index 100% rename from opal/mca/common/libfabric/libfabric/man/fi_mr_key.3 rename to opal/mca/common/libfabric/libfabric/man/man3/fi_mr_key.3 diff --git a/opal/mca/common/libfabric/libfabric/man/fi_mr_reg.3 b/opal/mca/common/libfabric/libfabric/man/man3/fi_mr_reg.3 similarity index 100% rename from opal/mca/common/libfabric/libfabric/man/fi_mr_reg.3 rename to opal/mca/common/libfabric/libfabric/man/man3/fi_mr_reg.3 diff --git a/opal/mca/common/libfabric/libfabric/man/fi_mr_regattr.3 b/opal/mca/common/libfabric/libfabric/man/man3/fi_mr_regattr.3 similarity index 100% rename from opal/mca/common/libfabric/libfabric/man/fi_mr_regattr.3 rename to opal/mca/common/libfabric/libfabric/man/man3/fi_mr_regattr.3 diff --git a/opal/mca/common/libfabric/libfabric/man/fi_mr_regv.3 b/opal/mca/common/libfabric/libfabric/man/man3/fi_mr_regv.3 similarity index 100% rename from opal/mca/common/libfabric/libfabric/man/fi_mr_regv.3 rename to opal/mca/common/libfabric/libfabric/man/man3/fi_mr_regv.3 diff --git a/opal/mca/common/libfabric/libfabric/man/fi_msg.3 b/opal/mca/common/libfabric/libfabric/man/man3/fi_msg.3 similarity index 98% rename from opal/mca/common/libfabric/libfabric/man/fi_msg.3 rename to opal/mca/common/libfabric/libfabric/man/man3/fi_msg.3 index d7f79ee6e9..0b6b8e8324 100644 --- a/opal/mca/common/libfabric/libfabric/man/fi_msg.3 +++ b/opal/mca/common/libfabric/libfabric/man/man3/fi_msg.3 @@ -1,4 +1,4 @@ -.TH fi_msg 3 "2015\-02\-06" "Libfabric Programmer\[aq]s Manual" "Libfabric v1.0.0rc3" +.TH fi_msg 3 "2015\-03\-04" "Libfabric Programmer\[aq]s Manual" "\@VERSION\@" .SH NAME .PP fi_msg - Message data transfer operations @@ -101,7 +101,7 @@ Completed message operations are reported to the user through one or more event collectors associated with the endpoint. Users provide context which are associated with each operation, and is returned to the user as part of the event completion. -See fi_eq for completion event details. +See fi_cq for completion event details. .SS fi_send .PP The call fi_send transfers the data contained in the user-specified data @@ -248,6 +248,6 @@ operations. .SH SEE ALSO .PP \f[C]fi_getinfo\f[](3), \f[C]fi_endpoint\f[](3), \f[C]fi_domain\f[](3), -\f[C]fi_eq\f[](3) +\f[C]fi_cq\f[](3) .SH AUTHORS OpenFabrics. diff --git a/opal/mca/common/libfabric/libfabric/man/fi_open.3 b/opal/mca/common/libfabric/libfabric/man/man3/fi_open.3 similarity index 100% rename from opal/mca/common/libfabric/libfabric/man/fi_open.3 rename to opal/mca/common/libfabric/libfabric/man/man3/fi_open.3 diff --git a/opal/mca/common/libfabric/libfabric/man/fi_passive_ep.3 b/opal/mca/common/libfabric/libfabric/man/man3/fi_passive_ep.3 similarity index 100% rename from opal/mca/common/libfabric/libfabric/man/fi_passive_ep.3 rename to opal/mca/common/libfabric/libfabric/man/man3/fi_passive_ep.3 diff --git a/opal/mca/common/libfabric/libfabric/man/fi_poll.3 b/opal/mca/common/libfabric/libfabric/man/man3/fi_poll.3 similarity index 99% rename from opal/mca/common/libfabric/libfabric/man/fi_poll.3 rename to opal/mca/common/libfabric/libfabric/man/man3/fi_poll.3 index 766a00a94b..0fd3c1b66a 100644 --- a/opal/mca/common/libfabric/libfabric/man/fi_poll.3 +++ b/opal/mca/common/libfabric/libfabric/man/man3/fi_poll.3 @@ -1,4 +1,4 @@ -.TH fi_poll 3 "2015\-01\-29" "Libfabric Programmer\[aq]s Manual" "Libfabric v1.0.0rc3" +.TH fi_poll 3 "2015\-01\-29" "Libfabric Programmer\[aq]s Manual" "\@VERSION\@" .SH NAME .PP fi_poll - Polling and wait set operations diff --git a/opal/mca/common/libfabric/libfabric/man/fi_poll_add.3 b/opal/mca/common/libfabric/libfabric/man/man3/fi_poll_add.3 similarity index 100% rename from opal/mca/common/libfabric/libfabric/man/fi_poll_add.3 rename to opal/mca/common/libfabric/libfabric/man/man3/fi_poll_add.3 diff --git a/opal/mca/common/libfabric/libfabric/man/fi_poll_del.3 b/opal/mca/common/libfabric/libfabric/man/man3/fi_poll_del.3 similarity index 100% rename from opal/mca/common/libfabric/libfabric/man/fi_poll_del.3 rename to opal/mca/common/libfabric/libfabric/man/man3/fi_poll_del.3 diff --git a/opal/mca/common/libfabric/libfabric/man/fi_poll_open.3 b/opal/mca/common/libfabric/libfabric/man/man3/fi_poll_open.3 similarity index 100% rename from opal/mca/common/libfabric/libfabric/man/fi_poll_open.3 rename to opal/mca/common/libfabric/libfabric/man/man3/fi_poll_open.3 diff --git a/opal/mca/common/libfabric/libfabric/man/fi_read.3 b/opal/mca/common/libfabric/libfabric/man/man3/fi_read.3 similarity index 100% rename from opal/mca/common/libfabric/libfabric/man/fi_read.3 rename to opal/mca/common/libfabric/libfabric/man/man3/fi_read.3 diff --git a/opal/mca/common/libfabric/libfabric/man/fi_readmsg.3 b/opal/mca/common/libfabric/libfabric/man/man3/fi_readmsg.3 similarity index 100% rename from opal/mca/common/libfabric/libfabric/man/fi_readmsg.3 rename to opal/mca/common/libfabric/libfabric/man/man3/fi_readmsg.3 diff --git a/opal/mca/common/libfabric/libfabric/man/fi_readv.3 b/opal/mca/common/libfabric/libfabric/man/man3/fi_readv.3 similarity index 100% rename from opal/mca/common/libfabric/libfabric/man/fi_readv.3 rename to opal/mca/common/libfabric/libfabric/man/man3/fi_readv.3 diff --git a/opal/mca/common/libfabric/libfabric/man/fi_recv.3 b/opal/mca/common/libfabric/libfabric/man/man3/fi_recv.3 similarity index 100% rename from opal/mca/common/libfabric/libfabric/man/fi_recv.3 rename to opal/mca/common/libfabric/libfabric/man/man3/fi_recv.3 diff --git a/opal/mca/common/libfabric/libfabric/man/fi_recvmsg.3 b/opal/mca/common/libfabric/libfabric/man/man3/fi_recvmsg.3 similarity index 100% rename from opal/mca/common/libfabric/libfabric/man/fi_recvmsg.3 rename to opal/mca/common/libfabric/libfabric/man/man3/fi_recvmsg.3 diff --git a/opal/mca/common/libfabric/libfabric/man/fi_recvv.3 b/opal/mca/common/libfabric/libfabric/man/man3/fi_recvv.3 similarity index 100% rename from opal/mca/common/libfabric/libfabric/man/fi_recvv.3 rename to opal/mca/common/libfabric/libfabric/man/man3/fi_recvv.3 diff --git a/opal/mca/common/libfabric/libfabric/man/fi_reject.3 b/opal/mca/common/libfabric/libfabric/man/man3/fi_reject.3 similarity index 100% rename from opal/mca/common/libfabric/libfabric/man/fi_reject.3 rename to opal/mca/common/libfabric/libfabric/man/man3/fi_reject.3 diff --git a/opal/mca/common/libfabric/libfabric/man/fi_rma.3 b/opal/mca/common/libfabric/libfabric/man/man3/fi_rma.3 similarity index 98% rename from opal/mca/common/libfabric/libfabric/man/fi_rma.3 rename to opal/mca/common/libfabric/libfabric/man/man3/fi_rma.3 index fd71cc9c38..bde460d340 100644 --- a/opal/mca/common/libfabric/libfabric/man/fi_rma.3 +++ b/opal/mca/common/libfabric/libfabric/man/man3/fi_rma.3 @@ -1,4 +1,4 @@ -.TH fi_rma 3 "2015\-01\-28" "Libfabric Programmer\[aq]s Manual" "Libfabric v1.0.0rc3" +.TH fi_rma 3 "2015\-03\-04" "Libfabric Programmer\[aq]s Manual" "\@VERSION\@" .SH NAME .PP fi_rma - Remote memory access operations @@ -252,6 +252,6 @@ operations. .SH SEE ALSO .PP \f[C]fi_getinfo\f[](3), \f[C]fi_endpoint\f[](3), \f[C]fi_domain\f[](3), -\f[C]fi_eq\f[](3) +\f[C]fi_cq\f[](3) .SH AUTHORS OpenFabrics. diff --git a/opal/mca/common/libfabric/libfabric/man/fi_rx_addr.3 b/opal/mca/common/libfabric/libfabric/man/man3/fi_rx_addr.3 similarity index 100% rename from opal/mca/common/libfabric/libfabric/man/fi_rx_addr.3 rename to opal/mca/common/libfabric/libfabric/man/man3/fi_rx_addr.3 diff --git a/opal/mca/common/libfabric/libfabric/man/fi_rx_size_left.3 b/opal/mca/common/libfabric/libfabric/man/man3/fi_rx_size_left.3 similarity index 100% rename from opal/mca/common/libfabric/libfabric/man/fi_rx_size_left.3 rename to opal/mca/common/libfabric/libfabric/man/man3/fi_rx_size_left.3 diff --git a/opal/mca/common/libfabric/libfabric/man/fi_send.3 b/opal/mca/common/libfabric/libfabric/man/man3/fi_send.3 similarity index 100% rename from opal/mca/common/libfabric/libfabric/man/fi_send.3 rename to opal/mca/common/libfabric/libfabric/man/man3/fi_send.3 diff --git a/opal/mca/common/libfabric/libfabric/man/fi_senddata.3 b/opal/mca/common/libfabric/libfabric/man/man3/fi_senddata.3 similarity index 100% rename from opal/mca/common/libfabric/libfabric/man/fi_senddata.3 rename to opal/mca/common/libfabric/libfabric/man/man3/fi_senddata.3 diff --git a/opal/mca/common/libfabric/libfabric/man/fi_sendmsg.3 b/opal/mca/common/libfabric/libfabric/man/man3/fi_sendmsg.3 similarity index 100% rename from opal/mca/common/libfabric/libfabric/man/fi_sendmsg.3 rename to opal/mca/common/libfabric/libfabric/man/man3/fi_sendmsg.3 diff --git a/opal/mca/common/libfabric/libfabric/man/fi_sendv.3 b/opal/mca/common/libfabric/libfabric/man/man3/fi_sendv.3 similarity index 100% rename from opal/mca/common/libfabric/libfabric/man/fi_sendv.3 rename to opal/mca/common/libfabric/libfabric/man/man3/fi_sendv.3 diff --git a/opal/mca/common/libfabric/libfabric/man/fi_setopt.3 b/opal/mca/common/libfabric/libfabric/man/man3/fi_setopt.3 similarity index 100% rename from opal/mca/common/libfabric/libfabric/man/fi_setopt.3 rename to opal/mca/common/libfabric/libfabric/man/man3/fi_setopt.3 diff --git a/opal/mca/common/libfabric/libfabric/man/fi_shutdown.3 b/opal/mca/common/libfabric/libfabric/man/man3/fi_shutdown.3 similarity index 100% rename from opal/mca/common/libfabric/libfabric/man/fi_shutdown.3 rename to opal/mca/common/libfabric/libfabric/man/man3/fi_shutdown.3 diff --git a/opal/mca/common/libfabric/libfabric/man/fi_strerror.3 b/opal/mca/common/libfabric/libfabric/man/man3/fi_strerror.3 similarity index 100% rename from opal/mca/common/libfabric/libfabric/man/fi_strerror.3 rename to opal/mca/common/libfabric/libfabric/man/man3/fi_strerror.3 diff --git a/opal/mca/common/libfabric/libfabric/man/fi_tagged.3 b/opal/mca/common/libfabric/libfabric/man/man3/fi_tagged.3 similarity index 97% rename from opal/mca/common/libfabric/libfabric/man/fi_tagged.3 rename to opal/mca/common/libfabric/libfabric/man/man3/fi_tagged.3 index 55f507c512..39cac9b30f 100644 --- a/opal/mca/common/libfabric/libfabric/man/fi_tagged.3 +++ b/opal/mca/common/libfabric/libfabric/man/man3/fi_tagged.3 @@ -1,4 +1,4 @@ -.TH fi_tagged 3 "2015\-02\-18" "Libfabric Programmer\[aq]s Manual" "Libfabric v1.0.0rc3" +.TH fi_tagged 3 "2015\-03\-04" "Libfabric Programmer\[aq]s Manual" "\@VERSION\@" .SH NAME .PP fi_tagged - Tagged data transfer operations @@ -133,7 +133,7 @@ Completed message operations are reported to the user through one or more event collectors associated with the endpoint. Users provide context which are associated with each operation, and is returned to the user as part of the event completion. -See fi_eq for completion event details. +See fi_cq for completion event details. .SS fi_tsend .PP The call fi_tsend transfers the data contained in the user-specified @@ -211,8 +211,8 @@ The fi_trecvmsg function takes a struct fi_msg_tagged as input. The function fi_tsearch determines if a message with the specified tag with ignore mask from an optionally supplied source address has been received and is buffered by the provider. -The fi_tsearch call is only available on endpoints with FI_BUFFERED_RECV -enabled. +The fi_tsearch call is only available on endpoints with provider +allocated buffering enabled (see fi_rx_attr total_buffered_recv). The fi_tsearch operation may complete asynchronously or immediately, depending on the underlying provider implementation. .PP @@ -317,6 +317,6 @@ the user. .SH SEE ALSO .PP \f[C]fi_getinfo\f[](3), \f[C]fi_endpoint\f[](3), \f[C]fi_domain\f[](3), -\f[C]fi_eq\f[](3) +\f[C]fi_cq\f[](3) .SH AUTHORS OpenFabrics. diff --git a/opal/mca/common/libfabric/libfabric/man/fi_tinject.3 b/opal/mca/common/libfabric/libfabric/man/man3/fi_tinject.3 similarity index 100% rename from opal/mca/common/libfabric/libfabric/man/fi_tinject.3 rename to opal/mca/common/libfabric/libfabric/man/man3/fi_tinject.3 diff --git a/opal/mca/common/libfabric/libfabric/man/fi_tinjectdata.3 b/opal/mca/common/libfabric/libfabric/man/man3/fi_tinjectdata.3 similarity index 100% rename from opal/mca/common/libfabric/libfabric/man/fi_tinjectdata.3 rename to opal/mca/common/libfabric/libfabric/man/man3/fi_tinjectdata.3 diff --git a/opal/mca/common/libfabric/libfabric/man/fi_tostr.3 b/opal/mca/common/libfabric/libfabric/man/man3/fi_tostr.3 similarity index 100% rename from opal/mca/common/libfabric/libfabric/man/fi_tostr.3 rename to opal/mca/common/libfabric/libfabric/man/man3/fi_tostr.3 diff --git a/opal/mca/common/libfabric/libfabric/man/fi_trecv.3 b/opal/mca/common/libfabric/libfabric/man/man3/fi_trecv.3 similarity index 100% rename from opal/mca/common/libfabric/libfabric/man/fi_trecv.3 rename to opal/mca/common/libfabric/libfabric/man/man3/fi_trecv.3 diff --git a/opal/mca/common/libfabric/libfabric/man/fi_trecvmsg.3 b/opal/mca/common/libfabric/libfabric/man/man3/fi_trecvmsg.3 similarity index 100% rename from opal/mca/common/libfabric/libfabric/man/fi_trecvmsg.3 rename to opal/mca/common/libfabric/libfabric/man/man3/fi_trecvmsg.3 diff --git a/opal/mca/common/libfabric/libfabric/man/fi_trecvv.3 b/opal/mca/common/libfabric/libfabric/man/man3/fi_trecvv.3 similarity index 100% rename from opal/mca/common/libfabric/libfabric/man/fi_trecvv.3 rename to opal/mca/common/libfabric/libfabric/man/man3/fi_trecvv.3 diff --git a/opal/mca/common/libfabric/libfabric/man/fi_trigger.3 b/opal/mca/common/libfabric/libfabric/man/man3/fi_trigger.3 similarity index 99% rename from opal/mca/common/libfabric/libfabric/man/fi_trigger.3 rename to opal/mca/common/libfabric/libfabric/man/man3/fi_trigger.3 index c525a28c8c..caa3570a59 100644 --- a/opal/mca/common/libfabric/libfabric/man/fi_trigger.3 +++ b/opal/mca/common/libfabric/libfabric/man/man3/fi_trigger.3 @@ -1,4 +1,4 @@ -.TH fi_trigger 3 "2015\-01\-01" "Libfabric Programmer\[aq]s Manual" "Libfabric v1.0.0rc3" +.TH fi_trigger 3 "2015\-01\-01" "Libfabric Programmer\[aq]s Manual" "\@VERSION\@" .SH NAME .PP fi_trigger - Triggered operations diff --git a/opal/mca/common/libfabric/libfabric/man/fi_tsearch.3 b/opal/mca/common/libfabric/libfabric/man/man3/fi_tsearch.3 similarity index 100% rename from opal/mca/common/libfabric/libfabric/man/fi_tsearch.3 rename to opal/mca/common/libfabric/libfabric/man/man3/fi_tsearch.3 diff --git a/opal/mca/common/libfabric/libfabric/man/fi_tsend.3 b/opal/mca/common/libfabric/libfabric/man/man3/fi_tsend.3 similarity index 100% rename from opal/mca/common/libfabric/libfabric/man/fi_tsend.3 rename to opal/mca/common/libfabric/libfabric/man/man3/fi_tsend.3 diff --git a/opal/mca/common/libfabric/libfabric/man/fi_tsenddata.3 b/opal/mca/common/libfabric/libfabric/man/man3/fi_tsenddata.3 similarity index 100% rename from opal/mca/common/libfabric/libfabric/man/fi_tsenddata.3 rename to opal/mca/common/libfabric/libfabric/man/man3/fi_tsenddata.3 diff --git a/opal/mca/common/libfabric/libfabric/man/fi_tsendmsg.3 b/opal/mca/common/libfabric/libfabric/man/man3/fi_tsendmsg.3 similarity index 100% rename from opal/mca/common/libfabric/libfabric/man/fi_tsendmsg.3 rename to opal/mca/common/libfabric/libfabric/man/man3/fi_tsendmsg.3 diff --git a/opal/mca/common/libfabric/libfabric/man/fi_tsendv.3 b/opal/mca/common/libfabric/libfabric/man/man3/fi_tsendv.3 similarity index 100% rename from opal/mca/common/libfabric/libfabric/man/fi_tsendv.3 rename to opal/mca/common/libfabric/libfabric/man/man3/fi_tsendv.3 diff --git a/opal/mca/common/libfabric/libfabric/man/fi_tx_size_left.3 b/opal/mca/common/libfabric/libfabric/man/man3/fi_tx_size_left.3 similarity index 100% rename from opal/mca/common/libfabric/libfabric/man/fi_tx_size_left.3 rename to opal/mca/common/libfabric/libfabric/man/man3/fi_tx_size_left.3 diff --git a/opal/mca/common/libfabric/libfabric/man/fi_version.3 b/opal/mca/common/libfabric/libfabric/man/man3/fi_version.3 similarity index 96% rename from opal/mca/common/libfabric/libfabric/man/fi_version.3 rename to opal/mca/common/libfabric/libfabric/man/man3/fi_version.3 index be35186e38..7ae1945953 100644 --- a/opal/mca/common/libfabric/libfabric/man/fi_version.3 +++ b/opal/mca/common/libfabric/libfabric/man/man3/fi_version.3 @@ -1,4 +1,4 @@ -.TH fi_version 3 "2015\-01\-08" "Libfabric Programmer\[aq]s Manual" "Libfabric v1.0.0rc3" +.TH fi_version 3 "2015\-01\-08" "Libfabric Programmer\[aq]s Manual" "\@VERSION\@" .SH NAME .PP fi_version - Version of the library interfaces diff --git a/opal/mca/common/libfabric/libfabric/man/fi_wait.3 b/opal/mca/common/libfabric/libfabric/man/man3/fi_wait.3 similarity index 100% rename from opal/mca/common/libfabric/libfabric/man/fi_wait.3 rename to opal/mca/common/libfabric/libfabric/man/man3/fi_wait.3 diff --git a/opal/mca/common/libfabric/libfabric/man/fi_wait_open.3 b/opal/mca/common/libfabric/libfabric/man/man3/fi_wait_open.3 similarity index 100% rename from opal/mca/common/libfabric/libfabric/man/fi_wait_open.3 rename to opal/mca/common/libfabric/libfabric/man/man3/fi_wait_open.3 diff --git a/opal/mca/common/libfabric/libfabric/man/fi_write.3 b/opal/mca/common/libfabric/libfabric/man/man3/fi_write.3 similarity index 100% rename from opal/mca/common/libfabric/libfabric/man/fi_write.3 rename to opal/mca/common/libfabric/libfabric/man/man3/fi_write.3 diff --git a/opal/mca/common/libfabric/libfabric/man/fi_writedata.3 b/opal/mca/common/libfabric/libfabric/man/man3/fi_writedata.3 similarity index 100% rename from opal/mca/common/libfabric/libfabric/man/fi_writedata.3 rename to opal/mca/common/libfabric/libfabric/man/man3/fi_writedata.3 diff --git a/opal/mca/common/libfabric/libfabric/man/fi_writemsg.3 b/opal/mca/common/libfabric/libfabric/man/man3/fi_writemsg.3 similarity index 100% rename from opal/mca/common/libfabric/libfabric/man/fi_writemsg.3 rename to opal/mca/common/libfabric/libfabric/man/man3/fi_writemsg.3 diff --git a/opal/mca/common/libfabric/libfabric/man/fi_writev.3 b/opal/mca/common/libfabric/libfabric/man/man3/fi_writev.3 similarity index 100% rename from opal/mca/common/libfabric/libfabric/man/fi_writev.3 rename to opal/mca/common/libfabric/libfabric/man/man3/fi_writev.3 diff --git a/opal/mca/common/libfabric/libfabric/man/fabric.7 b/opal/mca/common/libfabric/libfabric/man/man7/fabric.7 similarity index 99% rename from opal/mca/common/libfabric/libfabric/man/fabric.7 rename to opal/mca/common/libfabric/libfabric/man/man7/fabric.7 index 2b75150992..228f9521b3 100644 --- a/opal/mca/common/libfabric/libfabric/man/fabric.7 +++ b/opal/mca/common/libfabric/libfabric/man/man7/fabric.7 @@ -1,4 +1,4 @@ -.TH fabric 7 "2014\-12\-15" "Libfabric Programmer\[aq]s Manual" "Libfabric v1.0.0rc3" +.TH fabric 7 "2014\-12\-15" "Libfabric Programmer\[aq]s Manual" "\@VERSION\@" .SH NAME .PP Fabric Interface Library diff --git a/opal/mca/common/libfabric/libfabric/man/fi_direct.7 b/opal/mca/common/libfabric/libfabric/man/man7/fi_direct.7 similarity index 99% rename from opal/mca/common/libfabric/libfabric/man/fi_direct.7 rename to opal/mca/common/libfabric/libfabric/man/man7/fi_direct.7 index 70eafa8514..c53ad405e0 100644 --- a/opal/mca/common/libfabric/libfabric/man/fi_direct.7 +++ b/opal/mca/common/libfabric/libfabric/man/man7/fi_direct.7 @@ -1,4 +1,4 @@ -.TH fi_direct 7 "2014\-11\-21" "Libfabric Programmer\[aq]s Manual" "Libfabric v1.0.0rc3" +.TH fi_direct 7 "2014\-11\-21" "Libfabric Programmer\[aq]s Manual" "\@VERSION\@" .SH NAME .PP Direct fabric provider access diff --git a/opal/mca/common/libfabric/libfabric/prov/psm/src/psmx.h b/opal/mca/common/libfabric/libfabric/prov/psm/src/psmx.h index 59f67d85fd..6b473a299d 100644 --- a/opal/mca/common/libfabric/libfabric/prov/psm/src/psmx.h +++ b/opal/mca/common/libfabric/libfabric/prov/psm/src/psmx.h @@ -9,6 +9,7 @@ extern "C" { #include #endif +#include #include #include #include @@ -48,13 +49,13 @@ extern "C" { #define PSMX_TIME_OUT 120 -#define PSMX_OP_FLAGS (FI_INJECT | FI_MULTI_RECV | FI_EVENT | \ +#define PSMX_OP_FLAGS (FI_INJECT | FI_MULTI_RECV | FI_COMPLETION | \ FI_TRIGGER | FI_REMOTE_SIGNAL | FI_REMOTE_COMPLETE) #define PSMX_CAP_EXT (0) -#define PSMX_CAPS (FI_TAGGED | FI_MSG | FI_ATOMICS | FI_INJECT | \ - FI_RMA | FI_BUFFERED_RECV | FI_MULTI_RECV | \ +#define PSMX_CAPS (FI_TAGGED | FI_MSG | FI_ATOMICS | \ + FI_RMA | FI_MULTI_RECV | \ FI_READ | FI_WRITE | FI_SEND | FI_RECV | \ FI_REMOTE_READ | FI_REMOTE_WRITE | \ FI_REMOTE_COMPLETE | FI_REMOTE_SIGNAL | \ @@ -80,6 +81,8 @@ enum psmx_context_type { PSMX_SEND_CONTEXT, PSMX_RECV_CONTEXT, PSMX_MULTI_RECV_CONTEXT, + PSMX_TSEND_CONTEXT, + PSMX_TRECV_CONTEXT, PSMX_WRITE_CONTEXT, PSMX_READ_CONTEXT, PSMX_INJECT_CONTEXT, @@ -183,6 +186,7 @@ struct psmx_am_request { void *result; } atomic; }; + uint64_t cq_flags; struct fi_context fi_context; struct psmx_fid_ep *ep; int state; @@ -218,17 +222,18 @@ struct psmx_multi_recv { struct psmx_fid_fabric { struct fid_fabric fabric; + int refcnt; struct psmx_fid_domain *active_domain; + psm_uuid_t uuid; }; struct psmx_fid_domain { struct fid_domain domain; struct psmx_fid_fabric *fabric; + int refcnt; psm_ep_t psm_ep; psm_epid_t psm_epid; psm_mq_t psm_mq; - pthread_t ns_thread; - int ns_port; struct psmx_fid_ep *tagged_ep; struct psmx_fid_ep *msg_ep; struct psmx_fid_ep *rma_ep; @@ -538,6 +543,7 @@ extern struct fi_ops_rma psmx_rma_ops; extern struct fi_ops_atomic psmx_atomic_ops; extern struct psm_am_parameters psmx_am_param; extern struct psmx_env psmx_env; +extern struct psmx_fid_fabric *psmx_active_fabric; int psmx_domain_open(struct fid_fabric *fabric, struct fi_info *info, struct fid_domain **domain, void *context); diff --git a/opal/mca/common/libfabric/libfabric/prov/psm/src/psmx_atomic.c b/opal/mca/common/libfabric/libfabric/prov/psm/src/psmx_atomic.c index c3f41092bf..b52d81d69f 100644 --- a/opal/mca/common/libfabric/libfabric/prov/psm/src/psmx_atomic.c +++ b/opal/mca/common/libfabric/libfabric/prov/psm/src/psmx_atomic.c @@ -381,6 +381,7 @@ int psmx_am_atomic_handler(psm_am_token_t token, psm_epaddr_t epaddr, struct psmx_fid_mr *mr; struct psmx_fid_ep *target_ep; void *tmp_buf; + uint64_t cq_flags; switch (args[0].u32w0 & PSMX_AM_OP_MASK) { case PSMX_AM_REQ_ATOMIC_WRITE: @@ -404,7 +405,7 @@ int psmx_am_atomic_handler(psm_am_token_t token, psm_epaddr_t epaddr, mr->cq, 0, /* context */ addr, - 0, /* flags */ + FI_REMOTE_WRITE | FI_ATOMIC, len, 0, /* data */ 0, /* tag */ @@ -438,9 +439,12 @@ int psmx_am_atomic_handler(psm_am_token_t token, psm_epaddr_t epaddr, key = args[3].u64; datatype = args[4].u32w0; op = args[4].u32w1; + cq_flags = FI_REMOTE_WRITE | FI_ATOMIC; - if (op == FI_ATOMIC_READ) + if (op == FI_ATOMIC_READ) { len = fi_datatype_size(datatype) * count; + cq_flags = FI_REMOTE_READ | FI_ATOMIC; + } assert(len == fi_datatype_size(datatype) * count); @@ -463,7 +467,7 @@ int psmx_am_atomic_handler(psm_am_token_t token, psm_epaddr_t epaddr, mr->cq, 0, /* context */ addr, - 0, /* flags */ + cq_flags, len, 0, /* data */ 0, /* tag */ @@ -535,7 +539,7 @@ int psmx_am_atomic_handler(psm_am_token_t token, psm_epaddr_t epaddr, mr->cq, 0, /* context */ addr, - 0, /* flags */ + FI_REMOTE_WRITE | FI_ATOMIC, len, 0, /* data */ 0, /* tag */ @@ -578,7 +582,7 @@ int psmx_am_atomic_handler(psm_am_token_t token, psm_epaddr_t epaddr, req->ep->send_cq, req->atomic.context, req->atomic.buf, - 0, /* flags */ + req->cq_flags, req->atomic.len, 0, /* data */ 0, /* tag */ @@ -610,7 +614,7 @@ int psmx_am_atomic_handler(psm_am_token_t token, psm_epaddr_t epaddr, req->ep->send_cq, req->atomic.context, req->atomic.buf, - 0, /* flags */ + req->cq_flags, req->atomic.len, 0, /* data */ 0, /* tag */ @@ -653,6 +657,7 @@ static int psmx_atomic_self(int am_cmd, int err = 0; int op_error; int access; + uint64_t cq_flags = 0; if (am_cmd == PSMX_AM_REQ_ATOMIC_WRITE) access = FI_REMOTE_WRITE; @@ -672,18 +677,24 @@ static int psmx_atomic_self(int am_cmd, case PSMX_AM_REQ_ATOMIC_WRITE: err = psmx_atomic_do_write((void *)addr, (void *)buf, (int)datatype, (int)op, (int)count); + cq_flags = FI_WRITE | FI_ATOMIC; break; case PSMX_AM_REQ_ATOMIC_READWRITE: err = psmx_atomic_do_readwrite((void *)addr, (void *)buf, (void *)result, (int)datatype, (int)op, (int)count); + if (op == FI_ATOMIC_READ) + cq_flags = FI_READ | FI_ATOMIC; + else + cq_flags = FI_WRITE | FI_ATOMIC; break; case PSMX_AM_REQ_ATOMIC_COMPWRITE: err = psmx_atomic_do_compwrite((void *)addr, (void *)buf, (void *)compare, (void *)result, (int)datatype, (int)op, (int)count); + cq_flags = FI_WRITE | FI_ATOMIC; break; } @@ -693,7 +704,7 @@ static int psmx_atomic_self(int am_cmd, mr->cq, 0, /* context */ (void *)addr, - 0, /* flags */ + FI_REMOTE_WRITE | FI_ATOMIC, len, 0, /* data */ 0, /* tag */ @@ -729,13 +740,13 @@ static int psmx_atomic_self(int am_cmd, gen_local_event: no_event = ((flags & FI_INJECT) || - (ep->send_cq_event_flag && !(flags & FI_EVENT))); + (ep->send_cq_event_flag && !(flags & FI_COMPLETION))); if (ep->send_cq && !no_event) { event = psmx_cq_create_event( ep->send_cq, context, (void *)buf, - 0, /* flags */ + cq_flags, len, 0, /* data */ 0, /* tag */ @@ -856,7 +867,7 @@ ssize_t _psmx_atomic_write(struct fid_ep *ep, if (!req) return -FI_ENOMEM; - if (ep_priv->send_cq_event_flag && !(flags & FI_EVENT)) + if (ep_priv->send_cq_event_flag && !(flags & FI_COMPLETION)) req->no_event = 1; } @@ -867,6 +878,7 @@ ssize_t _psmx_atomic_write(struct fid_ep *ep, req->atomic.key = key; req->atomic.context = context; req->ep = ep_priv; + req->cq_flags = FI_WRITE | FI_ATOMIC; args[0].u32w0 = PSMX_AM_REQ_ATOMIC_WRITE; args[0].u32w1 = count; @@ -1042,7 +1054,7 @@ ssize_t _psmx_atomic_readwrite(struct fid_ep *ep, if (!req) return -FI_ENOMEM; - if (ep_priv->send_cq_event_flag && !(flags & FI_EVENT)) + if (ep_priv->send_cq_event_flag && !(flags & FI_COMPLETION)) req->no_event = 1; } @@ -1054,6 +1066,10 @@ ssize_t _psmx_atomic_readwrite(struct fid_ep *ep, req->atomic.context = context; req->atomic.result = result; req->ep = ep_priv; + if (op == FI_ATOMIC_READ) + req->cq_flags = FI_READ | FI_ATOMIC; + else + req->cq_flags = FI_WRITE | FI_ATOMIC; args[0].u32w0 = PSMX_AM_REQ_ATOMIC_READWRITE; args[0].u32w1 = count; @@ -1248,7 +1264,7 @@ ssize_t _psmx_atomic_compwrite(struct fid_ep *ep, if (!req) return -FI_ENOMEM; - if (ep_priv->send_cq_event_flag && !(flags & FI_EVENT)) + if (ep_priv->send_cq_event_flag && !(flags & FI_COMPLETION)) req->no_event = 1; if (compare != buf + len) { @@ -1269,6 +1285,7 @@ ssize_t _psmx_atomic_compwrite(struct fid_ep *ep, req->atomic.context = context; req->atomic.result = result; req->ep = ep_priv; + req->cq_flags = FI_WRITE | FI_ATOMIC; args[0].u32w0 = PSMX_AM_REQ_ATOMIC_COMPWRITE; args[0].u32w1 = count; diff --git a/opal/mca/common/libfabric/libfabric/prov/psm/src/psmx_av.c b/opal/mca/common/libfabric/libfabric/prov/psm/src/psmx_av.c index fc0cdc945c..db98c351fd 100644 --- a/opal/mca/common/libfabric/libfabric/prov/psm/src/psmx_av.c +++ b/opal/mca/common/libfabric/libfabric/prov/psm/src/psmx_av.c @@ -174,12 +174,20 @@ static int psmx_av_insert(struct fid_av *av, const void *addr, size_t count, if (!mask[i]) continue; - if (errors[i] == PSM_OK) { + if (errors[i] == PSM_OK || errors[i] == PSM_EPID_ALREADY_CONNECTED) { psmx_set_epaddr_context(av_priv->domain, ((psm_epid_t *) addr)[i], ((psm_epaddr_t *) fi_addr)[i]); } else { + PSMX_DEBUG("%d: psm_ep_connect returned %s. remote epid=%lx.\n", + i, psm_error_get_string(errors[i]), + ((psm_epid_t *)addr)[i]); + if (((psm_epid_t *)addr)[i] == 0) + PSMX_DEBUG("does the application depend on the provider" + "to resolve IP address into endpoint id? if so" + "check if the name server has started correctly" + "at the other side.\n"); fi_addr[i] = FI_ADDR_NOTAVAIL; error_count++; } @@ -308,8 +316,8 @@ int psmx_av_open(struct fid_domain *domain, struct fi_av_attr *attr, type = attr->type; break; default: - PSMX_DEBUG("%s: attr->type=%d, supported=%d %d\n", - __func__, attr->type, FI_AV_MAP, FI_AV_TABLE); + PSMX_DEBUG("attr->type=%d, supported=%d %d\n", + attr->type, FI_AV_MAP, FI_AV_TABLE); return -FI_EINVAL; } diff --git a/opal/mca/common/libfabric/libfabric/prov/psm/src/psmx_cntr.c b/opal/mca/common/libfabric/libfabric/prov/psm/src/psmx_cntr.c index f433eb663f..925946cefd 100644 --- a/opal/mca/common/libfabric/libfabric/prov/psm/src/psmx_cntr.c +++ b/opal/mca/common/libfabric/libfabric/prov/psm/src/psmx_cntr.c @@ -160,7 +160,7 @@ void psmx_cntr_check_trigger(struct psmx_fid_cntr *cntr) trigger->atomic_compwrite.flags); break; default: - PSMX_DEBUG("%s: %d unsupported op\n", __func__, trigger->op); + PSMX_DEBUG("%d unsupported op\n", trigger->op); break; } @@ -376,7 +376,7 @@ int psmx_cntr_open(struct fid_domain *domain, struct fi_cntr_attr *attr, break; default: - PSMX_DEBUG("%s: attr->events=%d, supported=%d\n", __func__, + PSMX_DEBUG("attr->events=%d, supported=%d\n", attr->events, FI_CNTR_EVENTS_COMP); return -FI_EINVAL; } @@ -388,8 +388,7 @@ int psmx_cntr_open(struct fid_domain *domain, struct fi_cntr_attr *attr, case FI_WAIT_SET: if (!attr->wait_set) { - PSMX_DEBUG("%s: FI_WAIT_SET is specified but attr->wait_set is NULL\n", - __func__); + PSMX_DEBUG("FI_WAIT_SET is specified but attr->wait_set is NULL\n"); return -FI_EINVAL; } wait = (struct psmx_fid_wait *)attr->wait_set; @@ -407,7 +406,7 @@ int psmx_cntr_open(struct fid_domain *domain, struct fi_cntr_attr *attr, break; default: - PSMX_DEBUG("%s: attr->wait_obj=%d, supported=%d...%d\n", __func__, + PSMX_DEBUG("attr->wait_obj=%d, supported=%d...%d\n", attr->wait_obj, FI_WAIT_NONE, FI_WAIT_MUTEX_COND); return -FI_EINVAL; } diff --git a/opal/mca/common/libfabric/libfabric/prov/psm/src/psmx_cq.c b/opal/mca/common/libfabric/libfabric/prov/psm/src/psmx_cq.c index 71d8866a27..0ff1a84e03 100644 --- a/opal/mca/common/libfabric/libfabric/prov/psm/src/psmx_cq.c +++ b/opal/mca/common/libfabric/libfabric/prov/psm/src/psmx_cq.c @@ -133,26 +133,62 @@ static struct psmx_cq_event *psmx_cq_create_event_from_status( struct fi_context *fi_context = psm_status->context; void *op_context, *buf; int is_recv = 0; + uint64_t flags; switch((int)PSMX_CTXT_TYPE(fi_context)) { case PSMX_SEND_CONTEXT: op_context = fi_context; buf = PSMX_CTXT_USER(fi_context); + flags = FI_SEND | FI_MSG; break; case PSMX_RECV_CONTEXT: op_context = fi_context; buf = PSMX_CTXT_USER(fi_context); + flags = FI_RECV | FI_MSG; is_recv = 1; break; case PSMX_MULTI_RECV_CONTEXT: op_context = fi_context; req = PSMX_CTXT_USER(fi_context); buf = req->buf + req->offset; + flags = FI_RECV | FI_MSG; is_recv = 1; break; + case PSMX_TSEND_CONTEXT: + op_context = fi_context; + buf = PSMX_CTXT_USER(fi_context); + flags = FI_SEND | FI_TAGGED; + break; + case PSMX_TRECV_CONTEXT: + op_context = fi_context; + buf = PSMX_CTXT_USER(fi_context); + flags = FI_RECV | FI_TAGGED; + is_recv = 1; + break; + case PSMX_READ_CONTEXT: + op_context = PSMX_CTXT_USER(fi_context); + buf = NULL; + flags = FI_READ | FI_RMA; + break; + case PSMX_WRITE_CONTEXT: + op_context = PSMX_CTXT_USER(fi_context); + buf = NULL; + flags = FI_WRITE | FI_RMA; + break; + case PSMX_REMOTE_READ_CONTEXT: + op_context = PSMX_CTXT_USER(fi_context); + buf = NULL; + flags = FI_REMOTE_READ | FI_RMA; + break; + case PSMX_REMOTE_WRITE_CONTEXT: + op_context = PSMX_CTXT_USER(fi_context); + buf = NULL; + flags = FI_REMOTE_WRITE | FI_RMA; + break; default: op_context = PSMX_CTXT_USER(fi_context); buf = NULL; + flags = 0; break; } @@ -180,6 +216,7 @@ static struct psmx_cq_event *psmx_cq_create_event_from_status( if (psm_status->error_code) { event->cqe.err.op_context = op_context; + event->cqe.err.flags = flags; event->cqe.err.err = -psmx_errno(psm_status->error_code); event->cqe.err.prov_errno = psm_status->error_code; event->cqe.err.tag = psm_status->msg_tag; @@ -196,14 +233,14 @@ static struct psmx_cq_event *psmx_cq_create_event_from_status( case FI_CQ_FORMAT_MSG: event->cqe.msg.op_context = op_context; - event->cqe.msg.flags = 0; + event->cqe.msg.flags = flags; event->cqe.msg.len = psm_status->nbytes; break; case FI_CQ_FORMAT_DATA: event->cqe.data.op_context = op_context; event->cqe.data.buf = buf; - event->cqe.data.flags = 0; + event->cqe.data.flags = flags; event->cqe.data.len = psm_status->nbytes; if (data) event->cqe.data.data = data; @@ -212,7 +249,7 @@ static struct psmx_cq_event *psmx_cq_create_event_from_status( case FI_CQ_FORMAT_TAGGED: event->cqe.tagged.op_context = op_context; event->cqe.tagged.buf = buf; - event->cqe.tagged.flags = 0; + event->cqe.tagged.flags = flags; event->cqe.tagged.len = psm_status->nbytes; event->cqe.tagged.tag = psm_status->msg_tag; if (data) @@ -323,11 +360,13 @@ int psmx_cq_poll_mq(struct psmx_fid_cq *cq, struct psmx_fid_domain *domain, break; case PSMX_SEND_CONTEXT: + case PSMX_TSEND_CONTEXT: tmp_cq = tmp_ep->send_cq; tmp_cntr = tmp_ep->send_cntr; break; case PSMX_RECV_CONTEXT: + case PSMX_TRECV_CONTEXT: tmp_cq = tmp_ep->recv_cq; tmp_cntr = tmp_ep->recv_cntr; break; @@ -738,7 +777,7 @@ int psmx_cq_open(struct fid_domain *domain, struct fi_cq_attr *attr, break; default: - PSMX_DEBUG("%s: attr->format=%d, supported=%d...%d\n", __func__, attr->format, + PSMX_DEBUG("attr->format=%d, supported=%d...%d\n", attr->format, FI_CQ_FORMAT_UNSPEC, FI_CQ_FORMAT_TAGGED); return -FI_EINVAL; } @@ -750,8 +789,7 @@ int psmx_cq_open(struct fid_domain *domain, struct fi_cq_attr *attr, case FI_WAIT_SET: if (!attr->wait_set) { - PSMX_DEBUG("%s: FI_WAIT_SET is specified but attr->wait_set is NULL\n", - __func__); + PSMX_DEBUG("FI_WAIT_SET is specified but attr->wait_set is NULL\n"); return -FI_EINVAL; } wait = (struct psmx_fid_wait *)attr->wait_set; @@ -769,7 +807,7 @@ int psmx_cq_open(struct fid_domain *domain, struct fi_cq_attr *attr, break; default: - PSMX_DEBUG("%s: attr->wait_obj=%d, supported=%d...%d\n", __func__, attr->wait_obj, + PSMX_DEBUG("attr->wait_obj=%d, supported=%d...%d\n", attr->wait_obj, FI_WAIT_NONE, FI_WAIT_MUTEX_COND); return -FI_EINVAL; } @@ -781,7 +819,7 @@ int psmx_cq_open(struct fid_domain *domain, struct fi_cq_attr *attr, break; default: - PSMX_DEBUG("%s: attr->wait_cond=%d, supported=%d...%d\n", __func__, + PSMX_DEBUG("attr->wait_cond=%d, supported=%d...%d\n", attr->wait_cond, FI_CQ_COND_NONE, FI_CQ_COND_THRESHOLD); return -FI_EINVAL; } diff --git a/opal/mca/common/libfabric/libfabric/prov/psm/src/psmx_domain.c b/opal/mca/common/libfabric/libfabric/prov/psm/src/psmx_domain.c index 394c4a4871..b92a002000 100644 --- a/opal/mca/common/libfabric/libfabric/prov/psm/src/psmx_domain.c +++ b/opal/mca/common/libfabric/libfabric/prov/psm/src/psmx_domain.c @@ -37,14 +37,14 @@ static int psmx_domain_close(fid_t fid) struct psmx_fid_domain *domain; int err; + PSMX_DEBUG("\n"); + domain = container_of(fid, struct psmx_fid_domain, domain.fid); - psmx_am_fini(domain); + if (--domain->refcnt > 0) + return 0; - if (domain->ns_thread) { - pthread_cancel(domain->ns_thread); - pthread_join(domain->ns_thread, NULL); - } + psmx_am_fini(domain); #if 0 /* AM messages could arrive after MQ is finalized, causing segfault @@ -94,22 +94,20 @@ int psmx_domain_open(struct fid_fabric *fabric, struct fi_info *info, struct psmx_fid_fabric *fabric_priv; struct psmx_fid_domain *domain_priv; struct psm_ep_open_opts opts; - psm_uuid_t uuid; int err = -FI_ENOMEM; - PSMX_DEBUG("%s\n", __func__); + PSMX_DEBUG("\n"); fabric_priv = container_of(fabric, struct psmx_fid_fabric, fabric); if (fabric_priv->active_domain) { - PSMX_DEBUG("%s: a domain has been opened for the fabric\n"); - return -FI_EBUSY; + fabric_priv->active_domain->refcnt++; + *domain = &fabric_priv->active_domain->domain; + return 0; } if (!info->domain_attr->name || strncmp(info->domain_attr->name, "psm", 3)) return -FI_EINVAL; - psmx_query_mpi(); - domain_priv = (struct psmx_fid_domain *) calloc(1, sizeof *domain_priv); if (!domain_priv) goto err_out; @@ -124,8 +122,7 @@ int psmx_domain_open(struct fid_fabric *fabric, struct fi_info *info, psm_ep_open_opts_get_defaults(&opts); - psmx_get_uuid(uuid); - err = psm_ep_open(uuid, &opts, + err = psm_ep_open(fabric_priv->uuid, &opts, &domain_priv->psm_ep, &domain_priv->psm_epid); if (err != PSM_OK) { PSMX_WARN("%s: psm_ep_open returns %d, errno=%d\n", @@ -143,25 +140,12 @@ int psmx_domain_open(struct fid_fabric *fabric, struct fi_info *info, goto err_out_close_ep; } - domain_priv->ns_port = psmx_uuid_to_port(uuid); - - if (psmx_env.name_server) - err = pthread_create(&domain_priv->ns_thread, NULL, psmx_name_server, (void *)domain_priv); - else - err = -1; - - if (err) - domain_priv->ns_thread = 0; - if (psmx_domain_enable_ep(domain_priv, NULL) < 0) { - if (domain_priv->ns_thread) { - pthread_cancel(domain_priv->ns_thread); - pthread_join(domain_priv->ns_thread, NULL); - } psm_mq_finalize(domain_priv->psm_mq); goto err_out_close_ep; } + domain_priv->refcnt = 1; fabric_priv->active_domain = domain_priv; *domain = &domain_priv->domain; return 0; diff --git a/opal/mca/common/libfabric/libfabric/prov/psm/src/psmx_ep.c b/opal/mca/common/libfabric/libfabric/prov/psm/src/psmx_ep.c index e9e2f7b5dc..32bb0e208e 100644 --- a/opal/mca/common/libfabric/libfabric/prov/psm/src/psmx_ep.c +++ b/opal/mca/common/libfabric/libfabric/prov/psm/src/psmx_ep.c @@ -37,35 +37,35 @@ static void psmx_ep_optimize_ops(struct psmx_fid_ep *ep) if (ep->ep.tagged) { if (ep->flags) { ep->ep.tagged = &psmx_tagged_ops; - PSMX_DEBUG("%s: generic tagged ops.\n", __func__); + PSMX_DEBUG("generic tagged ops.\n"); } else if (ep->send_cq_event_flag && ep->recv_cq_event_flag) { if (ep->av && ep->av->type == FI_AV_TABLE) ep->ep.tagged = &psmx_tagged_ops_no_event_av_table; else ep->ep.tagged = &psmx_tagged_ops_no_event_av_map; - PSMX_DEBUG("%s: tagged ops optimized for op_flags=0 and event suppression\n", __func__); + PSMX_DEBUG("tagged ops optimized for op_flags=0 and event suppression\n"); } else if (ep->send_cq_event_flag) { if (ep->av && ep->av->type == FI_AV_TABLE) ep->ep.tagged = &psmx_tagged_ops_no_send_event_av_table; else ep->ep.tagged = &psmx_tagged_ops_no_send_event_av_map; - PSMX_DEBUG("%s: tagged ops optimized for op_flags=0 and send event suppression\n", __func__); + PSMX_DEBUG("tagged ops optimized for op_flags=0 and send event suppression\n"); } else if (ep->recv_cq_event_flag) { if (ep->av && ep->av->type == FI_AV_TABLE) ep->ep.tagged = &psmx_tagged_ops_no_recv_event_av_table; else ep->ep.tagged = &psmx_tagged_ops_no_recv_event_av_map; - PSMX_DEBUG("%s: tagged ops optimized for op_flags=0 and recv event suppression\n", __func__); + PSMX_DEBUG("tagged ops optimized for op_flags=0 and recv event suppression\n"); } else { if (ep->av && ep->av->type == FI_AV_TABLE) ep->ep.tagged = &psmx_tagged_ops_no_flag_av_table; else ep->ep.tagged = &psmx_tagged_ops_no_flag_av_map; - PSMX_DEBUG("%s: tagged ops optimized for op_flags=0\n", __func__); + PSMX_DEBUG("tagged ops optimized for op_flags=0\n"); } } } @@ -172,12 +172,12 @@ static int psmx_ep_bind(struct fid *fid, struct fid *bfid, uint64_t flags) return -FI_EINVAL; if (flags & FI_SEND) { ep->send_cq = cq; - if (flags & FI_EVENT) + if (flags & FI_COMPLETION) ep->send_cq_event_flag = 1; } if (flags & FI_RECV) { ep->recv_cq = cq; - if (flags & FI_EVENT) + if (flags & FI_COMPLETION) ep->recv_cq_event_flag = 1; } psmx_ep_optimize_ops(ep); @@ -389,7 +389,7 @@ int psmx_stx_ctx(struct fid_domain *domain, struct fi_tx_attr *attr, { struct psmx_fid_stx *stx_priv; - PSMX_DEBUG("%s\n", __func__); + PSMX_DEBUG("\n"); stx_priv = (struct psmx_fid_stx *) calloc(1, sizeof *stx_priv); if (!stx_priv) diff --git a/opal/mca/common/libfabric/libfabric/prov/psm/src/psmx_init.c b/opal/mca/common/libfabric/libfabric/prov/psm/src/psmx_init.c index 06eebf222b..51e173358f 100644 --- a/opal/mca/common/libfabric/libfabric/prov/psm/src/psmx_init.c +++ b/opal/mca/common/libfabric/libfabric/prov/psm/src/psmx_init.c @@ -35,7 +35,8 @@ #include "prov.h" struct psmx_env psmx_env; -volatile int init_count = 0; +volatile int psmx_init_count = 0; +struct psmx_fid_fabric *psmx_active_fabric = NULL; static int psmx_reserve_tag_bits(int *caps, uint64_t *max_tag_value) { @@ -50,21 +51,19 @@ static int psmx_reserve_tag_bits(int *caps, uint64_t *max_tag_value) reserved_bits |= PSMX_MSG_BIT; } else if (ask_caps) { - PSMX_DEBUG("%s: unable to reserve tag bit for FI_MSG support.\n" + PSMX_DEBUG("unable to reserve tag bit for FI_MSG support.\n" "ADVICE: please reduce the asked max_tag_value, " "or remove FI_MSG from the asked capabilities, " "or set OFI_PSM_AM_MSG=1 to use an alternative (but less " - "optimized) message queue implementation.\n", - __func__); + "optimized) message queue implementation.\n"); return -1; } else { - PSMX_DEBUG("%s: unable to reserve tag bit for FI_MSG support. " + PSMX_DEBUG("unable to reserve tag bit for FI_MSG support. " "FI_MSG is removed from the capabilities.\n" "ADVICE: please reduce the asked max_tag_value, " "or set OFI_PSM_AM_MSG=1 to use an alternative (but less " - "optimized) message queue implementation.\n", - __func__); + "optimized) message queue implementation.\n"); ret_caps &= ~FI_MSG; } } @@ -74,19 +73,17 @@ static int psmx_reserve_tag_bits(int *caps, uint64_t *max_tag_value) reserved_bits |= PSMX_RMA_BIT; } else if (ask_caps) { - PSMX_DEBUG("%s: unable to reserve tag bit for tagged RMA acceleration.\n" + PSMX_DEBUG("unable to reserve tag bit for tagged RMA acceleration.\n" "ADVICE: please reduce the asked max_tag_value, " "or remove FI_RMA from the asked capabilities, " - "or set OFI_PSM_TAGGED_RMA=0 to disable RMA acceleration.\n", - __func__); + "or set OFI_PSM_TAGGED_RMA=0 to disable RMA acceleration.\n"); return -1; } else { - PSMX_DEBUG("%s: unable to reserve tag bit for tagged RMA acceleration. " + PSMX_DEBUG("unable to reserve tag bit for tagged RMA acceleration. " "FI_RMA is removed from the capabilities.\n" "ADVICE: please reduce the asked max_tag_value, " - "or set OFI_PSM_TAGGED_RMA=0 to disable RMA acceleration.\n", - __func__); + "or set OFI_PSM_TAGGED_RMA=0 to disable RMA acceleration.\n"); ret_caps &= ~FI_RMA; } } @@ -109,12 +106,12 @@ static int psmx_getinfo(uint32_t version, const char *node, const char *service, uint64_t max_tag_value = 0; int err = -FI_ENODATA; - PSMX_DEBUG("%s\n", __func__); + PSMX_DEBUG("\n"); *info = NULL; if (psm_ep_num_devunits(&cnt) || !cnt) { - PSMX_DEBUG("%s: no PSM device is found.\n", __func__); + PSMX_DEBUG("no PSM device is found.\n"); return -FI_ENODATA; } @@ -122,100 +119,103 @@ static int psmx_getinfo(uint32_t version, const char *node, const char *service, dest_addr = psmx_resolve_name(node, 0); if (hints) { - switch (hints->ep_type) { - case FI_EP_UNSPEC: - case FI_EP_RDM: - break; - default: - PSMX_DEBUG("%s: hints->ep_type=%d, supported=%d,%d.\n", - __func__, hints->ep_type, FI_EP_UNSPEC, - FI_EP_RDM); - goto err_out; - } - if (hints->ep_attr) { + switch (hints->ep_attr->type) { + case FI_EP_UNSPEC: + case FI_EP_RDM: + break; + default: + PSMX_DEBUG("hints->ep_attr->type=%d, supported=%d,%d.\n", + hints->ep_attr->type, FI_EP_UNSPEC, + FI_EP_RDM); + goto err_out; + } + switch (hints->ep_attr->protocol) { case FI_PROTO_UNSPEC: case FI_PROTO_PSMX: break; default: - PSMX_DEBUG("%s: hints->protocol=%d, supported=%d %d\n", - __func__, hints->ep_attr->protocol, + PSMX_DEBUG("hints->protocol=%d, supported=%d %d\n", + hints->ep_attr->protocol, FI_PROTO_UNSPEC, FI_PROTO_PSMX); goto err_out; } if (hints->ep_attr->tx_ctx_cnt > 1) { - PSMX_DEBUG("%s: hints->ep_attr->tx_ctx_cnt=%d, supported=0,1\n", - __func__, hints->ep_attr->tx_ctx_cnt); + PSMX_DEBUG("hints->ep_attr->tx_ctx_cnt=%d, supported=0,1\n", + hints->ep_attr->tx_ctx_cnt); goto err_out; } if (hints->ep_attr->rx_ctx_cnt > 1) { - PSMX_DEBUG("%s: hints->ep_attr->rx_ctx_cnt=%d, supported=0,1\n", - __func__, hints->ep_attr->rx_ctx_cnt); + PSMX_DEBUG("hints->ep_attr->rx_ctx_cnt=%d, supported=0,1\n", + hints->ep_attr->rx_ctx_cnt); goto err_out; } } if ((hints->caps & PSMX_CAPS) != hints->caps && (hints->caps & PSMX_CAPS2) != hints->caps) { - PSMX_DEBUG("%s: hints->caps=0x%llx, supported=0x%llx,0x%llx\n", - __func__, hints->caps, PSMX_CAPS, PSMX_CAPS2); + PSMX_DEBUG("hints->caps=0x%llx, supported=0x%llx,0x%llx\n", + hints->caps, PSMX_CAPS, PSMX_CAPS2); goto err_out; } - if (hints->tx_attr && - (hints->tx_attr->op_flags & PSMX_OP_FLAGS) != - hints->tx_attr->op_flags) { - PSMX_DEBUG("%s: hints->tx->flags=0x%llx, supported=0x%llx\n", - __func__, hints->tx_attr->op_flags, PSMX_OP_FLAGS); - goto err_out; + if (hints->tx_attr) { + if ((hints->tx_attr->op_flags & PSMX_OP_FLAGS) != + hints->tx_attr->op_flags) { + PSMX_DEBUG("hints->tx->flags=0x%llx, " + "supported=0x%llx\n", + hints->tx_attr->op_flags, + PSMX_OP_FLAGS); + goto err_out; + } + if (hints->tx_attr->inject_size > PSMX_INJECT_SIZE) { + PSMX_DEBUG("hints->tx_attr->inject_size=%ld," + "supported=%ld.\n", + hints->tx_attr->inject_size, + PSMX_INJECT_SIZE); + goto err_out; + } } if (hints->rx_attr && (hints->rx_attr->op_flags & PSMX_OP_FLAGS) != hints->rx_attr->op_flags) { - PSMX_DEBUG("%s: hints->rx->flags=0x%llx, supported=0x%llx\n", - __func__, hints->rx_attr->op_flags, PSMX_OP_FLAGS); + PSMX_DEBUG("hints->rx->flags=0x%llx, supported=0x%llx\n", + hints->rx_attr->op_flags, PSMX_OP_FLAGS); goto err_out; } if ((hints->mode & PSMX_MODE) != PSMX_MODE) { - PSMX_DEBUG("%s: hints->mode=0x%llx, required=0x%llx\n", - __func__, hints->mode, PSMX_MODE); + PSMX_DEBUG("hints->mode=0x%llx, required=0x%llx\n", + hints->mode, PSMX_MODE); goto err_out; } if (hints->fabric_attr && hints->fabric_attr->name && strncmp(hints->fabric_attr->name, "psm", 3)) { - PSMX_DEBUG("%s: hints->fabric_name=%s, supported=psm\n", - __func__, hints->fabric_attr->name); + PSMX_DEBUG("hints->fabric_name=%s, supported=psm\n", + hints->fabric_attr->name); goto err_out; } if (hints->domain_attr && hints->domain_attr->name && strncmp(hints->domain_attr->name, "psm", 3)) { - PSMX_DEBUG("%s: hints->domain_name=%s, supported=psm\n", - __func__, hints->domain_attr->name); + PSMX_DEBUG("hints->domain_name=%s, supported=psm\n", + hints->domain_attr->name); goto err_out; } if (hints->ep_attr) { if (hints->ep_attr->max_msg_size > PSMX_MAX_MSG_SIZE) { - PSMX_DEBUG("%s: hints->ep_attr->max_msg_size=%ld," - "supported=%ld.\n", __func__, + PSMX_DEBUG("hints->ep_attr->max_msg_size=%ld," + "supported=%ld.\n", hints->ep_attr->max_msg_size, PSMX_MAX_MSG_SIZE); goto err_out; } - if (hints->ep_attr->inject_size > PSMX_INJECT_SIZE) { - PSMX_DEBUG("%s: hints->ep_attr->inject_size=%ld," - "supported=%ld.\n", __func__, - hints->ep_attr->inject_size, - PSMX_INJECT_SIZE); - goto err_out; - } max_tag_value = fi_tag_bits(hints->ep_attr->mem_tag_format); } @@ -227,16 +227,15 @@ static int psmx_getinfo(uint32_t version, const char *node, const char *service, if (psmx_reserve_tag_bits(&caps, &max_tag_value) < 0) goto err_out; - psmx_info = fi_allocinfo_internal(); + psmx_info = fi_allocinfo(); if (!psmx_info) { err = -FI_ENOMEM; goto err_out; } + psmx_info->ep_attr->type = ep_type; psmx_info->ep_attr->protocol = FI_PROTO_PSMX; psmx_info->ep_attr->max_msg_size = PSMX_MAX_MSG_SIZE; - psmx_info->ep_attr->inject_size = PSMX_INJECT_SIZE; - psmx_info->ep_attr->total_buffered_recv = ~(0ULL); /* that's how PSM handles it internally! */ psmx_info->ep_attr->mem_tag_format = fi_tag_format(max_tag_value); psmx_info->ep_attr->msg_order = FI_ORDER_SAS; psmx_info->ep_attr->comp_order = FI_ORDER_NONE; @@ -249,7 +248,6 @@ static int psmx_getinfo(uint32_t version, const char *node, const char *service, psmx_info->domain_attr->name = strdup("psm"); psmx_info->next = NULL; - psmx_info->ep_type = ep_type; psmx_info->caps = (hints && hints->caps) ? hints->caps : caps; psmx_info->mode = PSMX_MODE; psmx_info->addr_format = FI_ADDR_PSMX; @@ -266,7 +264,7 @@ static int psmx_getinfo(uint32_t version, const char *node, const char *service, ? hints->tx_attr->op_flags : 0; psmx_info->tx_attr->msg_order = psmx_info->ep_attr->msg_order; psmx_info->tx_attr->comp_order = psmx_info->ep_attr->comp_order; - psmx_info->tx_attr->inject_size = psmx_info->ep_attr->inject_size; + psmx_info->tx_attr->inject_size = PSMX_INJECT_SIZE; psmx_info->tx_attr->size = UINT64_MAX; psmx_info->tx_attr->iov_limit = 1; @@ -276,7 +274,7 @@ static int psmx_getinfo(uint32_t version, const char *node, const char *service, ? hints->tx_attr->op_flags : 0; psmx_info->rx_attr->msg_order = psmx_info->ep_attr->msg_order; psmx_info->rx_attr->comp_order = psmx_info->ep_attr->comp_order; - psmx_info->rx_attr->total_buffered_recv = psmx_info->ep_attr->total_buffered_recv; + psmx_info->rx_attr->total_buffered_recv = ~(0ULL); /* that's how PSM handles it internally! */ psmx_info->rx_attr->size = UINT64_MAX; psmx_info->rx_attr->iov_limit = 1; @@ -289,7 +287,19 @@ err_out: static int psmx_fabric_close(fid_t fid) { - free(fid); + struct psmx_fid_fabric *fabric; + + PSMX_DEBUG("\n"); + + fabric = container_of(fid, struct psmx_fid_fabric, fabric.fid); + if (--fabric->refcnt) { + if (fabric->active_domain) + fi_close(&fabric->active_domain->domain.fid); + assert(fabric == psmx_active_fabric); + psmx_active_fabric = NULL; + free(fid); + } + return 0; } @@ -308,12 +318,20 @@ static int psmx_fabric(struct fi_fabric_attr *attr, struct fid_fabric **fabric, void *context) { struct psmx_fid_fabric *fabric_priv; + pthread_t thread; + pthread_attr_t thread_attr; - PSMX_DEBUG("%s\n", __func__); + PSMX_DEBUG("\n"); if (strncmp(attr->name, "psm", 3)) return -FI_ENODATA; + if (psmx_active_fabric) { + psmx_active_fabric->refcnt++; + *fabric = &psmx_active_fabric->fabric; + return 0; + } + fabric_priv = calloc(1, sizeof(*fabric_priv)); if (!fabric_priv) return -FI_ENOMEM; @@ -322,15 +340,27 @@ static int psmx_fabric(struct fi_fabric_attr *attr, fabric_priv->fabric.fid.context = context; fabric_priv->fabric.fid.ops = &psmx_fabric_fi_ops; fabric_priv->fabric.ops = &psmx_fabric_ops; + + psmx_get_uuid(fabric_priv->uuid); + + if (psmx_env.name_server) { + pthread_attr_init(&thread_attr); + pthread_attr_setdetachstate(&thread_attr,PTHREAD_CREATE_DETACHED); + pthread_create(&thread, &thread_attr, psmx_name_server, (void *)fabric_priv); + } + + psmx_query_mpi(); + + fabric_priv->refcnt = 1; *fabric = &fabric_priv->fabric; return 0; } static void psmx_fini(void) { - PSMX_DEBUG("%s\n", __func__); + PSMX_DEBUG("\n"); - if (! --init_count) + if (! --psmx_init_count) psm_finalize(); } @@ -378,7 +408,7 @@ PSM_INI if (!psmx_env.uuid) psmx_env.uuid = PSMX_DEFAULT_UUID; - PSMX_DEBUG("%s\n", __func__); + PSMX_DEBUG("\n"); psm_error_register_handler(NULL, PSM_ERRHANDLER_NO_HANDLER); @@ -392,8 +422,8 @@ PSM_INI return NULL; } - PSMX_DEBUG("%s: PSM header version = (%d, %d)\n", __func__, PSM_VERNO_MAJOR, PSM_VERNO_MINOR); - PSMX_DEBUG("%s: PSM library version = (%d, %d)\n", __func__, major, minor); + PSMX_DEBUG("PSM header version = (%d, %d)\n", PSM_VERNO_MAJOR, PSM_VERNO_MINOR); + PSMX_DEBUG("PSM library version = (%d, %d)\n", major, minor); check_version = psmx_get_int_env("OFI_PSM_VERSION_CHECK", 1); @@ -404,13 +434,13 @@ PSM_INI return NULL; } - PSMX_DEBUG("%s: OFI_PSM_NAME_SERVER = %d\n", __func__, psmx_env.name_server); - PSMX_DEBUG("%s: OFI_PSM_AM_MSG = %d\n", __func__, psmx_env.am_msg); - PSMX_DEBUG("%s: OFI_PSM_TAGGED_RMA = %d\n", __func__, psmx_env.tagged_rma); - PSMX_DEBUG("%s: OFI_PSM_WARNING = %d\n", __func__, psmx_env.warning); - PSMX_DEBUG("%s: OFI_PSM_UUID = %s\n", __func__, psmx_env.uuid); + PSMX_DEBUG("OFI_PSM_NAME_SERVER = %d\n", psmx_env.name_server); + PSMX_DEBUG("OFI_PSM_AM_MSG = %d\n", psmx_env.am_msg); + PSMX_DEBUG("OFI_PSM_TAGGED_RMA = %d\n", psmx_env.tagged_rma); + PSMX_DEBUG("OFI_PSM_WARNING = %d\n", psmx_env.warning); + PSMX_DEBUG("OFI_PSM_UUID = %s\n", psmx_env.uuid); - init_count++; + psmx_init_count++; return (&psmx_prov); } diff --git a/opal/mca/common/libfabric/libfabric/prov/psm/src/psmx_mr.c b/opal/mca/common/libfabric/libfabric/prov/psm/src/psmx_mr.c index 0660f03cbf..711abc6e66 100644 --- a/opal/mca/common/libfabric/libfabric/prov/psm/src/psmx_mr.c +++ b/opal/mca/common/libfabric/libfabric/prov/psm/src/psmx_mr.c @@ -261,14 +261,20 @@ static void psmx_mr_normalize_iov(struct iovec *iov, size_t *count) *count = i; } -static int psmx_mr_reg(struct fid_domain *domain, const void *buf, size_t len, +static int psmx_mr_reg(struct fid *fid, const void *buf, size_t len, uint64_t access, uint64_t offset, uint64_t requested_key, uint64_t flags, struct fid_mr **mr, void *context) { + struct fid_domain *domain; struct psmx_fid_domain *domain_priv; struct psmx_fid_mr *mr_priv; uint64_t key; + if (fid->fclass != FI_CLASS_DOMAIN) { + return -FI_EINVAL; + } + domain = container_of(fid, struct fid_domain, fid); + domain_priv = container_of(domain, struct psmx_fid_domain, domain); if (flags & FI_MR_KEY) { if (domain_priv->mode & FI_PROV_MR_ATTR) @@ -311,16 +317,22 @@ static int psmx_mr_reg(struct fid_domain *domain, const void *buf, size_t len, return 0; } -static int psmx_mr_regv(struct fid_domain *domain, +static int psmx_mr_regv(struct fid *fid, const struct iovec *iov, size_t count, uint64_t access, uint64_t offset, uint64_t requested_key, uint64_t flags, struct fid_mr **mr, void *context) { + struct fid_domain *domain; struct psmx_fid_domain *domain_priv; struct psmx_fid_mr *mr_priv; int i; uint64_t key; + if (fid->fclass != FI_CLASS_DOMAIN) { + return -FI_EINVAL; + } + domain = container_of(fid, struct fid_domain, fid); + domain_priv = container_of(domain, struct psmx_fid_domain, domain); if (flags & FI_MR_KEY) { if (domain_priv->mode & FI_PROV_MR_ATTR) @@ -369,14 +381,20 @@ static int psmx_mr_regv(struct fid_domain *domain, return 0; } -static int psmx_mr_regattr(struct fid_domain *domain, const struct fi_mr_attr *attr, +static int psmx_mr_regattr(struct fid *fid, const struct fi_mr_attr *attr, uint64_t flags, struct fid_mr **mr) { + struct fid_domain *domain; struct psmx_fid_domain *domain_priv; struct psmx_fid_mr *mr_priv; int i; uint64_t key; + if (fid->fclass != FI_CLASS_DOMAIN) { + return -FI_EINVAL; + } + domain = container_of(fid, struct fid_domain, fid); + domain_priv = container_of(domain, struct psmx_fid_domain, domain); if (flags & FI_MR_KEY) { if (domain_priv->mode & FI_PROV_MR_ATTR) diff --git a/opal/mca/common/libfabric/libfabric/prov/psm/src/psmx_msg.c b/opal/mca/common/libfabric/libfabric/prov/psm/src/psmx_msg.c index f230ede3ac..772e6627a1 100644 --- a/opal/mca/common/libfabric/libfabric/prov/psm/src/psmx_msg.c +++ b/opal/mca/common/libfabric/libfabric/prov/psm/src/psmx_msg.c @@ -91,7 +91,7 @@ ssize_t _psmx_recv(struct fid_ep *ep, void *buf, size_t len, psm_tagsel = PSMX_MSG_BIT; } - if (ep_priv->recv_cq_event_flag && !(flags & FI_EVENT) && !context) { + if (ep_priv->recv_cq_event_flag && !(flags & FI_COMPLETION) && !context) { fi_context = &ep_priv->nocomp_recv_context; } else { @@ -256,7 +256,7 @@ ssize_t _psmx_send(struct fid_ep *ep, const void *buf, size_t len, PSMX_CTXT_TYPE(fi_context) = PSMX_INJECT_CONTEXT; PSMX_CTXT_EP(fi_context) = ep_priv; } - else if (ep_priv->send_cq_event_flag && !(flags & FI_EVENT) && !context) { + else if (ep_priv->send_cq_event_flag && !(flags & FI_COMPLETION) && !context) { fi_context = &ep_priv->nocomp_send_context; } else { diff --git a/opal/mca/common/libfabric/libfabric/prov/psm/src/psmx_msg2.c b/opal/mca/common/libfabric/libfabric/prov/psm/src/psmx_msg2.c index cd01a9f0d6..e33bf13141 100644 --- a/opal/mca/common/libfabric/libfabric/prov/psm/src/psmx_msg2.c +++ b/opal/mca/common/libfabric/libfabric/prov/psm/src/psmx_msg2.c @@ -221,7 +221,7 @@ int psmx_am_msg_handler(psm_am_token_t token, psm_epaddr_t epaddr, req->ep->recv_cq, req->recv.context, req->recv.buf, - 0, /* flags */ + req->cq_flags, req->recv.len_received, 0, /* data */ 0, /* tag */ @@ -275,7 +275,7 @@ int psmx_am_msg_handler(psm_am_token_t token, psm_epaddr_t epaddr, req->ep->send_cq, req->send.context, req->send.buf, - 0, /* flags */ + req->cq_flags, req->send.len, 0, /* data */ 0, /* tag */ @@ -391,8 +391,9 @@ static ssize_t _psmx_recv2(struct fid_ep *ep, void *buf, size_t len, req->recv.context = context; req->recv.src_addr = (void *)src_addr; req->ep = ep_priv; + req->cq_flags = FI_RECV | FI_MSG; - if (ep_priv->recv_cq_event_flag && !(flags & FI_EVENT)) + if (ep_priv->recv_cq_event_flag && !(flags & FI_COMPLETION)) req->no_event = 1; unexp = psmx_am_search_and_dequeue_unexp(ep_priv->domain, @@ -429,7 +430,7 @@ static ssize_t _psmx_recv2(struct fid_ep *ep, void *buf, size_t len, req->ep->recv_cq, req->recv.context, req->recv.buf, - 0, /* flags */ + req->cq_flags, req->recv.len_received, 0, /* data */ 0, /* tag */ @@ -547,8 +548,9 @@ static ssize_t _psmx_send2(struct fid_ep *ep, const void *buf, size_t len, req->send.len_sent = msg_size; req->send.dest_addr = (void *)dest_addr; req->ep = ep_priv; + req->cq_flags = FI_SEND | FI_MSG; - if ((ep_priv->send_cq_event_flag && !(flags & FI_EVENT)) || + if ((ep_priv->send_cq_event_flag && !(flags & FI_COMPLETION)) || (context == &ep_priv->sendimm_context)) req->no_event = 1; diff --git a/opal/mca/common/libfabric/libfabric/prov/psm/src/psmx_rma.c b/opal/mca/common/libfabric/libfabric/prov/psm/src/psmx_rma.c index b9ad7fd558..6afce2a658 100644 --- a/opal/mca/common/libfabric/libfabric/prov/psm/src/psmx_rma.c +++ b/opal/mca/common/libfabric/libfabric/prov/psm/src/psmx_rma.c @@ -110,7 +110,7 @@ int psmx_am_rma_handler(psm_am_token_t token, psm_epaddr_t epaddr, mr->cq, 0, /* context */ rma_addr, - 0, /* flags */ + FI_REMOTE_WRITE | FI_RMA | (has_data ? FI_REMOTE_CQ_DATA : 0), rma_len, has_data ? args[4].u64 : 0, 0, /* tag */ @@ -256,7 +256,7 @@ int psmx_am_rma_handler(psm_am_token_t token, psm_epaddr_t epaddr, req->ep->send_cq, req->write.context, req->write.buf, - 0, /* flags */ + req->cq_flags, req->write.len, 0, /* data */ 0, /* tag */ @@ -292,7 +292,7 @@ int psmx_am_rma_handler(psm_am_token_t token, psm_epaddr_t epaddr, req->ep->send_cq, req->read.context, req->read.buf, - 0, /* flags */ + req->cq_flags, req->read.len_read, 0, /* data */ 0, /* tag */ @@ -331,13 +331,16 @@ static ssize_t psmx_rma_self(int am_cmd, int op_error = 0; int access; void *dst, *src; + uint64_t cq_flags; switch (am_cmd) { case PSMX_AM_REQ_WRITE: access = FI_REMOTE_WRITE; + cq_flags = FI_WRITE | FI_RMA; break; case PSMX_AM_REQ_READ: access = FI_REMOTE_READ; + cq_flags = FI_READ | FI_RMA; break; default: return -FI_EINVAL; @@ -366,7 +369,7 @@ static ssize_t psmx_rma_self(int am_cmd, mr->cq, 0, /* context */ (void *)addr, - 0, /* flags */ + FI_REMOTE_WRITE | FI_RMA | (flags & FI_REMOTE_CQ_DATA), len, flags & FI_REMOTE_CQ_DATA ? data : 0, 0, /* tag */ @@ -385,14 +388,14 @@ static ssize_t psmx_rma_self(int am_cmd, } no_event = (flags & FI_INJECT) || - (ep->send_cq_event_flag && !(flags & FI_EVENT)); + (ep->send_cq_event_flag && !(flags & FI_COMPLETION)); if (ep->send_cq && !no_event) { event = psmx_cq_create_event( ep->send_cq, context, (void *)buf, - 0, /* flags */ + cq_flags, len, 0, /* data */ 0, /* tag */ @@ -514,11 +517,12 @@ ssize_t _psmx_read(struct fid_ep *ep, void *buf, size_t len, req->read.key = key; /* needed? */ req->read.context = context; req->ep = ep_priv; + req->cq_flags = FI_READ | FI_RMA; PSMX_CTXT_TYPE(&req->fi_context) = PSMX_READ_CONTEXT; PSMX_CTXT_USER(&req->fi_context) = context; PSMX_CTXT_EP(&req->fi_context) = ep_priv; - if (ep_priv->send_cq_event_flag && !(flags & FI_EVENT)) { + if (ep_priv->send_cq_event_flag && !(flags & FI_COMPLETION)) { PSMX_CTXT_TYPE(&req->fi_context) = PSMX_NOCOMP_READ_CONTEXT; req->no_event = 1; } @@ -689,7 +693,7 @@ ssize_t _psmx_write(struct fid_ep *ep, const void *buf, size_t len, if (!req) return -FI_ENOMEM; - if (ep_priv->send_cq_event_flag && !(flags & FI_EVENT)) { + if (ep_priv->send_cq_event_flag && !(flags & FI_COMPLETION)) { PSMX_CTXT_TYPE(&req->fi_context) = PSMX_NOCOMP_WRITE_CONTEXT; req->no_event = 1; } @@ -705,6 +709,7 @@ ssize_t _psmx_write(struct fid_ep *ep, const void *buf, size_t len, req->write.key = key; /* needed? */ req->write.context = context; req->ep = ep_priv; + req->cq_flags = FI_WRITE | FI_RMA; PSMX_CTXT_USER(&req->fi_context) = context; PSMX_CTXT_EP(&req->fi_context) = ep_priv; diff --git a/opal/mca/common/libfabric/libfabric/prov/psm/src/psmx_tagged.c b/opal/mca/common/libfabric/libfabric/prov/psm/src/psmx_tagged.c index e03e6bdcfa..e771ebc0c6 100644 --- a/opal/mca/common/libfabric/libfabric/prov/psm/src/psmx_tagged.c +++ b/opal/mca/common/libfabric/libfabric/prov/psm/src/psmx_tagged.c @@ -81,7 +81,7 @@ ssize_t _psmx_tagged_recv(struct fid_ep *ep, void *buf, size_t len, psm_tag = tag & (~ep_priv->domain->reserved_tag_bits); psm_tagsel = (~ignore) | ep_priv->domain->reserved_tag_bits; - if (ep_priv->recv_cq_event_flag && !(flags & FI_EVENT) && !context) { + if (ep_priv->recv_cq_event_flag && !(flags & FI_COMPLETION) && !context) { fi_context = &ep_priv->nocomp_recv_context; } else { @@ -90,7 +90,7 @@ ssize_t _psmx_tagged_recv(struct fid_ep *ep, void *buf, size_t len, fi_context = context; user_fi_context= 1; - PSMX_CTXT_TYPE(fi_context) = PSMX_RECV_CONTEXT; + PSMX_CTXT_TYPE(fi_context) = PSMX_TRECV_CONTEXT; PSMX_CTXT_USER(fi_context) = buf; PSMX_CTXT_EP(fi_context) = ep_priv; } @@ -125,7 +125,7 @@ ssize_t psmx_tagged_recv_no_flag_av_map(struct fid_ep *ep, void *buf, psm_tagsel = (~ignore) | ep_priv->domain->reserved_tag_bits; fi_context = context; - PSMX_CTXT_TYPE(fi_context) = PSMX_RECV_CONTEXT; + PSMX_CTXT_TYPE(fi_context) = PSMX_TRECV_CONTEXT; PSMX_CTXT_USER(fi_context) = buf; PSMX_CTXT_EP(fi_context) = ep_priv; @@ -157,7 +157,7 @@ ssize_t psmx_tagged_recv_no_flag_av_table(struct fid_ep *ep, void *buf, psm_tagsel = (~ignore) | ep_priv->domain->reserved_tag_bits; fi_context = context; - PSMX_CTXT_TYPE(fi_context) = PSMX_RECV_CONTEXT; + PSMX_CTXT_TYPE(fi_context) = PSMX_TRECV_CONTEXT; PSMX_CTXT_USER(fi_context) = buf; PSMX_CTXT_EP(fi_context) = ep_priv; @@ -422,7 +422,7 @@ ssize_t _psmx_tagged_send(struct fid_ep *ep, const void *buf, size_t len, PSMX_CTXT_TYPE(fi_context) = PSMX_INJECT_CONTEXT; PSMX_CTXT_EP(fi_context) = ep_priv; } - else if (ep_priv->send_cq_event_flag && !(flags & FI_EVENT) && !context) { + else if (ep_priv->send_cq_event_flag && !(flags & FI_COMPLETION) && !context) { fi_context = &ep_priv->nocomp_send_context; } else { @@ -432,7 +432,7 @@ ssize_t _psmx_tagged_send(struct fid_ep *ep, const void *buf, size_t len, fi_context = context; if (fi_context != &ep_priv->sendimm_context) { user_fi_context = 1; - PSMX_CTXT_TYPE(fi_context) = PSMX_SEND_CONTEXT; + PSMX_CTXT_TYPE(fi_context) = PSMX_TSEND_CONTEXT; PSMX_CTXT_USER(fi_context) = (void *)buf; PSMX_CTXT_EP(fi_context) = ep_priv; } @@ -468,7 +468,7 @@ ssize_t psmx_tagged_send_no_flag_av_map(struct fid_ep *ep, const void *buf, psm_tag = tag & (~ep_priv->domain->reserved_tag_bits); fi_context = context; - PSMX_CTXT_TYPE(fi_context) = PSMX_SEND_CONTEXT; + PSMX_CTXT_TYPE(fi_context) = PSMX_TSEND_CONTEXT; PSMX_CTXT_USER(fi_context) = (void *)buf; PSMX_CTXT_EP(fi_context) = ep_priv; @@ -507,7 +507,7 @@ ssize_t psmx_tagged_send_no_flag_av_table(struct fid_ep *ep, const void *buf, psm_tag = tag & (~ep_priv->domain->reserved_tag_bits); fi_context = context; - PSMX_CTXT_TYPE(fi_context) = PSMX_SEND_CONTEXT; + PSMX_CTXT_TYPE(fi_context) = PSMX_TSEND_CONTEXT; PSMX_CTXT_USER(fi_context) = (void *)buf; PSMX_CTXT_EP(fi_context) = ep_priv; diff --git a/opal/mca/common/libfabric/libfabric/prov/psm/src/psmx_util.c b/opal/mca/common/libfabric/libfabric/prov/psm/src/psmx_util.c index b39c17defc..15f1176763 100644 --- a/opal/mca/common/libfabric/libfabric/prov/psm/src/psmx_util.c +++ b/opal/mca/common/libfabric/libfabric/prov/psm/src/psmx_util.c @@ -74,11 +74,6 @@ int psmx_uuid_to_port(psm_uuid_t uuid) return (int)port; } -static void psmx_name_server_cleanup(void *args) -{ - close((int)(uintptr_t)args); -} - /************************************************************* * A simple name resolution mechanism for client-server style * applications. The server side has to run first. The client @@ -89,7 +84,7 @@ static void psmx_name_server_cleanup(void *args) *************************************************************/ void *psmx_name_server(void *args) { - struct psmx_fid_domain *domain; + struct psmx_fid_fabric *fabric; struct addrinfo hints = { .ai_flags = AI_PASSIVE, .ai_family = AF_UNSPEC, @@ -100,18 +95,17 @@ void *psmx_name_server(void *args) int listenfd = -1, connfd; int port; int n; + int ret; - domain = args; - port = domain->ns_port; - - pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, NULL); + fabric = args; + port = psmx_uuid_to_port(fabric->uuid); if (asprintf(&service, "%d", port) < 0) return NULL; n = getaddrinfo(NULL, service, &hints, &res); if (n < 0) { - PSMX_DEBUG("%s: port %d: %s\n", __func__, port, gai_strerror(n)); + PSMX_DEBUG("port %d: %s\n", port, gai_strerror(n)); free(service); return NULL; } @@ -132,28 +126,24 @@ void *psmx_name_server(void *args) free(service); if (listenfd < 0) { - PSMX_DEBUG("%s: couldn't listen to port %d\n", __func__, port); + PSMX_DEBUG("couldn't listen to port %d. try set OFI_PSM_UUID to a different value?\n", port); return NULL; } listen(listenfd, 256); - pthread_cleanup_push(psmx_name_server_cleanup, (void *)(uintptr_t)listenfd); - { - pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL); - pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, NULL); - - while (1) { - connfd = accept(listenfd, NULL, 0); - if (connfd >= 0) { - pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, NULL); - write(connfd, &domain->psm_epid, sizeof(psm_epid_t)); - close(connfd); - pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL); + while (1) { + connfd = accept(listenfd, NULL, 0); + if (connfd >= 0) { + if (fabric->active_domain) { + ret = write(connfd, &fabric->active_domain->psm_epid, sizeof(psm_epid_t)); + if (ret != sizeof(psm_epid_t)) + PSMX_WARN("%s: error sending address info to the client\n", + __func__); } + close(connfd); } } - pthread_cleanup_pop(1); return NULL; } @@ -181,7 +171,7 @@ void *psmx_resolve_name(const char *servername, int port) n = getaddrinfo(servername, service, &hints, &res); if (n < 0) { - PSMX_DEBUG("%s:(%s:%d):%s\n", __func__, servername, port, gai_strerror(n)); + PSMX_DEBUG("(%s:%d):%s\n", servername, port, gai_strerror(n)); free(service); return NULL; } @@ -200,7 +190,7 @@ void *psmx_resolve_name(const char *servername, int port) free(service); if (sockfd < 0) { - PSMX_DEBUG("%s: couldn't connect to %s:%d\n", __func__, servername, port); + PSMX_DEBUG("couldn't connect to %s:%d\n", servername, port); return NULL; } diff --git a/opal/mca/common/libfabric/libfabric/prov/psm/src/psmx_wait.c b/opal/mca/common/libfabric/libfabric/prov/psm/src/psmx_wait.c index 1e53bfe5d8..f9d5348698 100644 --- a/opal/mca/common/libfabric/libfabric/prov/psm/src/psmx_wait.c +++ b/opal/mca/common/libfabric/libfabric/prov/psm/src/psmx_wait.c @@ -188,7 +188,8 @@ void psmx_wait_signal(struct fid_wait *wait) break; case FI_WAIT_FD: - write(wait_priv->fd[1], &c, 1); + if (write(wait_priv->fd[1], &c, 1) != 1) + PSMX_WARN("%s: error signaling wait object\n", __func__); break; case FI_WAIT_MUTEX_COND: @@ -276,8 +277,8 @@ int psmx_wait_open(struct fid_fabric *fabric, struct fi_wait_attr *attr, break; default: - PSMX_DEBUG("%s: attr->wait_obj=%d, supported=%d,%d,%d\n", - __func__, attr->wait_obj, FI_WAIT_UNSPEC, + PSMX_DEBUG("attr->wait_obj=%d, supported=%d,%d,%d\n", + attr->wait_obj, FI_WAIT_UNSPEC, FI_WAIT_FD, FI_WAIT_MUTEX_COND); return -FI_EINVAL; } diff --git a/opal/mca/common/libfabric/libfabric/prov/sockets/src/sock.h b/opal/mca/common/libfabric/libfabric/prov/sockets/src/sock.h index ed7af56584..0aa143b376 100644 --- a/opal/mca/common/libfabric/libfabric/prov/sockets/src/sock.h +++ b/opal/mca/common/libfabric/libfabric/prov/sockets/src/sock.h @@ -94,23 +94,23 @@ #define SOCK_EP_MAX_CM_DATA_SZ (256) #define SOCK_EP_RDM_CAP (FI_MSG | FI_RMA | FI_TAGGED | FI_ATOMICS | \ - FI_DYNAMIC_MR | FI_NAMED_RX_CTX | FI_BUFFERED_RECV | \ - FI_DIRECTED_RECV | FI_INJECT | FI_MULTI_RECV | \ + FI_DYNAMIC_MR | FI_NAMED_RX_CTX | \ + FI_DIRECTED_RECV | FI_MULTI_RECV | \ FI_SOURCE | FI_READ | FI_WRITE | FI_RECV | FI_SEND | \ - FI_REMOTE_READ | FI_REMOTE_WRITE | FI_REMOTE_CQ_DATA | \ + FI_REMOTE_READ | FI_REMOTE_WRITE | \ FI_COMPLETION | FI_REMOTE_SIGNAL | FI_REMOTE_COMPLETE | \ FI_MORE | FI_CANCEL | FI_FENCE) #define SOCK_EP_MSG_CAP SOCK_EP_RDM_CAP #define SOCK_EP_DGRAM_CAP (FI_MSG | FI_TAGGED | FI_DYNAMIC_MR | \ - FI_NAMED_RX_CTX | FI_BUFFERED_RECV | FI_DIRECTED_RECV | \ - FI_INJECT | FI_MULTI_RECV | FI_SOURCE | FI_RECV | FI_SEND | \ - FI_REMOTE_CQ_DATA | FI_COMPLETION | FI_REMOTE_SIGNAL | \ + FI_NAMED_RX_CTX | FI_DIRECTED_RECV | \ + FI_MULTI_RECV | FI_SOURCE | FI_RECV | FI_SEND | \ + FI_COMPLETION | FI_REMOTE_SIGNAL | \ FI_REMOTE_COMPLETE | FI_MORE | FI_CANCEL | \ FI_FENCE) -#define SOCK_DEF_OPS (FI_SEND | FI_RECV | FI_BUFFERED_RECV) +#define SOCK_DEF_OPS (FI_SEND | FI_RECV ) #define SOCK_EP_MSG_ORDER (FI_ORDER_RAR | FI_ORDER_RAW | FI_ORDER_RAS| \ FI_ORDER_WAR | FI_ORDER_WAW | FI_ORDER_WAS | \ @@ -184,9 +184,11 @@ struct sock_cntr { struct dlist_entry rx_list; struct dlist_entry tx_list; + fastlock_t list_lock; struct fid_wait *waitset; int signal; + int is_waiting; }; struct sock_mr { @@ -377,6 +379,16 @@ struct sock_comp { struct sock_eq *eq; }; +struct sock_cm_entry { + int sock; + int do_listen; + int signal_fds[2]; + fastlock_t lock; + int shutdown_received; + pthread_t listener_thread; + struct dlist_entry msg_list; +}; + struct sock_ep { struct fid_ep ep; size_t fclass; @@ -416,24 +428,19 @@ struct sock_ep { enum fi_ep_type ep_type; struct sockaddr_in *src_addr; struct sockaddr_in *dest_addr; - fi_addr_t conn_addr; - uint16_t key; - int socket; - pthread_t listener_thread; - int do_listen; + struct sockaddr_in cm_addr; + fid_t peer_fid; + uint16_t key; + int is_disabled; + struct sock_cm_entry cm; }; struct sock_pep { struct fid_pep pep; struct sock_fabric *sock_fab; - int do_listen; - pthread_t listener_thread; - int signal_fds[2]; - int socket; - int listener_sock_fd; - + struct sock_cm_entry cm; struct sockaddr_in src_addr; struct fi_info info; struct sock_eq *eq; @@ -702,6 +709,7 @@ struct sock_cq { struct ringbuffd cq_rbfd; struct ringbuf cqerr_rb; fastlock_t lock; + fastlock_t list_lock; struct fid_wait *waitset; int signal; @@ -713,6 +721,13 @@ struct sock_cq { sock_cq_report_fn report_completion; }; +struct sock_cm_msg_list_entry { + size_t msg_len; + struct sockaddr_in addr; + struct dlist_entry entry; + char msg[0]; +}; + struct sock_conn_hdr { uint8_t type; uint8_t reserved[3]; @@ -797,6 +812,7 @@ int sock_msg_sep(struct fid_domain *domain, struct fi_info *info, int sock_msg_passive_ep(struct fid_fabric *fabric, struct fi_info *info, struct fid_pep **pep, void *context); int sock_ep_enable(struct fid_ep *ep); +int sock_ep_disable(struct fid_ep *ep); int sock_stx_ctx(struct fid_domain *domain, diff --git a/opal/mca/common/libfabric/libfabric/prov/sockets/src/sock_atomic.c b/opal/mca/common/libfabric/libfabric/prov/sockets/src/sock_atomic.c index ecd84957e2..3b71971dae 100644 --- a/opal/mca/common/libfabric/libfabric/prov/sockets/src/sock_atomic.c +++ b/opal/mca/common/libfabric/libfabric/prov/sockets/src/sock_atomic.c @@ -34,6 +34,7 @@ # include #endif /* HAVE_CONFIG_H */ +#include #include #include #include diff --git a/opal/mca/common/libfabric/libfabric/prov/sockets/src/sock_cntr.c b/opal/mca/common/libfabric/libfabric/prov/sockets/src/sock_cntr.c index 7e91f1a558..528b772544 100644 --- a/opal/mca/common/libfabric/libfabric/prov/sockets/src/sock_cntr.c +++ b/opal/mca/common/libfabric/libfabric/prov/sockets/src/sock_cntr.c @@ -37,7 +37,6 @@ #include #include #include -#include #include #include @@ -61,6 +60,7 @@ int sock_cntr_progress(struct sock_cntr *cntr) !sock_progress_thread_wait) return 0; + fastlock_acquire(&cntr->list_lock); for (entry = cntr->tx_list.next; entry != &cntr->tx_list; entry = entry->next) { tx_ctx = container_of(entry, struct sock_tx_ctx, cntr_entry); @@ -72,6 +72,8 @@ int sock_cntr_progress(struct sock_cntr *cntr) rx_ctx = container_of(entry, struct sock_rx_ctx, cntr_entry); sock_pe_progress_rx_ctx(cntr->domain->pe, rx_ctx); } + fastlock_release(&cntr->list_lock); + return 0; } @@ -85,18 +87,20 @@ static uint64_t sock_cntr_read(struct fid_cntr *cntr) int sock_cntr_inc(struct sock_cntr *cntr) { - fastlock_acquire(&cntr->mut); + pthread_mutex_lock(&cntr->mut); atomic_inc(&cntr->value); if (atomic_get(&cntr->value) >= atomic_get(&cntr->threshold)) pthread_cond_signal(&cntr->cond); - fastlock_release(&cntr->mut); + pthread_mutex_unlock(&cntr->mut); return 0; } int sock_cntr_err_inc(struct sock_cntr *cntr) { + pthread_mutex_lock(&cntr->mut); atomic_inc(&cntr->err_cnt); pthread_cond_signal(&cntr->cond); + pthread_mutex_unlock(&cntr->mut); return 0; } @@ -105,11 +109,11 @@ static int sock_cntr_add(struct fid_cntr *cntr, uint64_t value) struct sock_cntr *_cntr; _cntr = container_of(cntr, struct sock_cntr, cntr_fid); - fastlock_acquire(&_cntr->mut); + pthread_mutex_lock(&_cntr->mut); atomic_set(&_cntr->value, atomic_get(&_cntr->value) + value); if (atomic_get(&_cntr->value) >= atomic_get(&_cntr->threshold)) pthread_cond_signal(&_cntr->cond); - fastlock_release(&_cntr->mut); + pthread_mutex_unlock(&_cntr->mut); return 0; } @@ -118,46 +122,59 @@ static int sock_cntr_set(struct fid_cntr *cntr, uint64_t value) struct sock_cntr *_cntr; _cntr = container_of(cntr, struct sock_cntr, cntr_fid); - fastlock_acquire(&_cntr->mut); + pthread_mutex_lock(&_cntr->mut); atomic_set(&_cntr->value, value); if (atomic_get(&_cntr->value) >= atomic_get(&_cntr->threshold)) pthread_cond_signal(&_cntr->cond); - fastlock_release(&_cntr->mut); + pthread_mutex_unlock(&_cntr->mut); return 0; } static int sock_cntr_wait(struct fid_cntr *cntr, uint64_t threshold, int timeout) { int ret = 0; - struct timeval now; - double start_ms, end_ms; + uint64_t start_ms = 0, end_ms = 0; struct sock_cntr *_cntr; - + _cntr = container_of(cntr, struct sock_cntr, cntr_fid); - fastlock_acquire(&_cntr->mut); + pthread_mutex_lock(&_cntr->mut); + if (atomic_get(&_cntr->value) >= threshold) { + pthread_mutex_unlock(&_cntr->mut); + return 0; + } + + if (_cntr->is_waiting) { + pthread_mutex_unlock(&_cntr->mut); + return -FI_EBUSY; + } + + _cntr->is_waiting = 1; atomic_set(&_cntr->threshold, threshold); - while (atomic_get(&_cntr->value) < atomic_get(&_cntr->threshold) && !ret) { - if (_cntr->domain->progress_mode == FI_PROGRESS_MANUAL) { - if (timeout > 0) { - gettimeofday(&now, NULL); - start_ms = (double)now.tv_sec * 1000.0 + - (double)now.tv_usec / 1000.0; - } + + if (_cntr->domain->progress_mode == FI_PROGRESS_MANUAL) { + pthread_mutex_unlock(&_cntr->mut); + if (timeout >= 0) { + start_ms = fi_gettime_ms(); + end_ms = start_ms + timeout; + } + + while (atomic_get(&_cntr->value) < threshold) { sock_cntr_progress(_cntr); - if (timeout > 0) { - gettimeofday(&now, NULL); - end_ms = (double)now.tv_sec * 1000.0 + - (double)now.tv_usec / 1000.0; - timeout -= (end_ms - start_ms); - timeout = timeout < 0 ? 0 : timeout; + if (timeout >= 0 && fi_gettime_ms() >= end_ms) { + ret = -FI_ETIMEDOUT; + break; } } + pthread_mutex_lock(&_cntr->mut); + } else { ret = fi_wait_cond(&_cntr->cond, &_cntr->mut, timeout); } + + _cntr->is_waiting = 0; atomic_set(&_cntr->threshold, ~0); - fastlock_release(&_cntr->mut); + pthread_mutex_unlock(&_cntr->mut); return -ret; -} +} int sock_cntr_control(struct fid *fid, int command, void *arg) { @@ -214,7 +231,9 @@ static int sock_cntr_close(struct fid *fid) if (cntr->signal && cntr->attr.wait_obj == FI_WAIT_FD) sock_wait_close(&cntr->waitset->fid); - fastlock_destroy(&cntr->mut); + pthread_mutex_destroy(&cntr->mut); + fastlock_destroy(&cntr->list_lock); + pthread_cond_destroy(&cntr->cond); atomic_dec(&cntr->domain->ref); free(cntr); @@ -289,7 +308,7 @@ int sock_cntr_open(struct fid_domain *domain, struct fi_cntr_attr *attr, ret = pthread_cond_init(&_cntr->cond, NULL); if (ret) - goto err1; + goto err; if(attr == NULL) memcpy(&_cntr->attr, &sock_cntr_add, sizeof(sock_cntr_attr)); @@ -309,14 +328,18 @@ int sock_cntr_open(struct fid_domain *domain, struct fi_cntr_attr *attr, wait_attr.wait_obj = FI_WAIT_FD; ret = sock_wait_open(&dom->fab->fab_fid, &wait_attr, &_cntr->waitset); - if (ret) - goto err1; + if (ret) { + ret = FI_EINVAL; + goto err; + } _cntr->signal = 1; break; case FI_WAIT_SET: - if (!attr) - return -FI_EINVAL; + if (!attr) { + ret = FI_EINVAL; + goto err; + } _cntr->waitset = attr->wait_set; _cntr->signal = 1; @@ -325,14 +348,15 @@ int sock_cntr_open(struct fid_domain *domain, struct fi_cntr_attr *attr, dlist_init(&list_entry->entry); list_entry->fid = &_cntr->cntr_fid.fid; dlist_insert_after(&list_entry->entry, &wait->fid_list); - break; default: break; } - fastlock_init(&_cntr->mut); + pthread_mutex_init(&_cntr->mut, NULL); + fastlock_init(&_cntr->list_lock); + atomic_init(&_cntr->ref, 0); atomic_init(&_cntr->err_cnt, 0); @@ -352,7 +376,7 @@ int sock_cntr_open(struct fid_domain *domain, struct fi_cntr_attr *attr, *cntr = &_cntr->cntr_fid; return 0; -err1: +err: free(_cntr); return -ret; } diff --git a/opal/mca/common/libfabric/libfabric/prov/sockets/src/sock_cq.c b/opal/mca/common/libfabric/libfabric/prov/sockets/src/sock_cq.c index fb5fb59e07..28048c0a80 100644 --- a/opal/mca/common/libfabric/libfabric/prov/sockets/src/sock_cq.c +++ b/opal/mca/common/libfabric/libfabric/prov/sockets/src/sock_cq.c @@ -59,6 +59,7 @@ int sock_cq_progress(struct sock_cq *cq) !sock_progress_thread_wait) return 0; + fastlock_acquire(&cq->list_lock); for (entry = cq->tx_list.next; entry != &cq->tx_list; entry = entry->next) { tx_ctx = container_of(entry, struct sock_tx_ctx, cq_entry); @@ -70,6 +71,8 @@ int sock_cq_progress(struct sock_cq *cq) rx_ctx = container_of(entry, struct sock_rx_ctx, cq_entry); sock_pe_progress_rx_ctx(cq->domain->pe, rx_ctx); } + fastlock_release(&cq->list_lock); + return 0; } @@ -245,45 +248,39 @@ ssize_t sock_cq_sreadfrom(struct fid_cq *cq, void *buf, size_t count, { int ret = 0; int64_t threshold; - struct timeval now; struct sock_cq *sock_cq; - double start_ms, end_ms; + uint64_t start_ms = 0, end_ms = 0; ssize_t cq_entry_len, avail; sock_cq = container_of(cq, struct sock_cq, cq_fid); cq_entry_len = sock_cq->cq_entry_size; - if (sock_cq->domain->progress_mode == FI_PROGRESS_MANUAL) { - if (timeout > 0) { - gettimeofday(&now, NULL); - start_ms = (double)now.tv_sec * 1000.0 + - (double)now.tv_usec / 1000.0; - } - sock_cq_progress(sock_cq); - if (timeout > 0) { - gettimeofday(&now, NULL); - end_ms = (double)now.tv_sec * 1000.0 + - (double)now.tv_usec / 1000.0; - timeout -= (end_ms - start_ms); - timeout = timeout < 0 ? 0 : timeout; - } - } else - sock_cq_progress(sock_cq); - if (sock_cq->attr.wait_cond == FI_CQ_COND_THRESHOLD) { threshold = MIN((int64_t)cond, count); }else{ threshold = count; } - fastlock_acquire(&sock_cq->lock); - if ((avail = rbfdused(&sock_cq->cq_rbfd))) - ret = sock_cq_rbuf_read(sock_cq, buf, - MIN(threshold, avail / cq_entry_len), - src_addr, cq_entry_len); - fastlock_release(&sock_cq->lock); + if (sock_cq->domain->progress_mode == FI_PROGRESS_MANUAL) { + if (timeout >= 0) { + start_ms = fi_gettime_ms(); + end_ms = start_ms + timeout; + } - if (ret == 0) { + do { + sock_cq_progress(sock_cq); + fastlock_acquire(&sock_cq->lock); + if ((avail = rbfdused(&sock_cq->cq_rbfd))) + ret = sock_cq_rbuf_read(sock_cq, buf, + MIN(threshold, avail / cq_entry_len), + src_addr, cq_entry_len); + fastlock_release(&sock_cq->lock); + if (ret == 0 && timeout >= 0) { + if (fi_gettime_ms() >= end_ms) + return -FI_ETIMEDOUT; + } + }while (ret == 0); + } else { ret = rbfdwait(&sock_cq->cq_rbfd, timeout); fastlock_acquire(&sock_cq->lock); if (ret != -FI_ETIMEDOUT && (avail = rbfdused(&sock_cq->cq_rbfd))) @@ -386,6 +383,7 @@ int sock_cq_close(struct fid *fid) rbfdfree(&cq->cq_rbfd); fastlock_destroy(&cq->lock); + fastlock_destroy(&cq->list_lock); atomic_dec(&cq->domain->ref); free(cq); @@ -550,14 +548,18 @@ int sock_cq_open(struct fid_domain *domain, struct fi_cq_attr *attr, wait_attr.wait_obj = FI_WAIT_MUTEX_COND; ret = sock_wait_open(&sock_dom->fab->fab_fid, &wait_attr, &sock_cq->waitset); - if (ret) - goto err3; + if (ret) { + ret = -FI_EINVAL; + goto err4; + } sock_cq->signal = 1; break; case FI_WAIT_SET: - if (!attr) - return -FI_EINVAL; + if (!attr) { + ret = -FI_EINVAL; + goto err4; + } sock_cq->waitset = attr->wait_set; sock_cq->signal = 1; @@ -567,14 +569,19 @@ int sock_cq_open(struct fid_domain *domain, struct fi_cq_attr *attr, list_entry->fid = &sock_cq->cq_fid.fid; dlist_insert_after(&list_entry->entry, &wait->fid_list); break; + default: break; } *cq = &sock_cq->cq_fid; atomic_inc(&sock_dom->ref); + fastlock_init(&sock_cq->list_lock); + return 0; - + +err4: + rbfree(&sock_cq->cqerr_rb); err3: rbfree(&sock_cq->addr_rb); err2: diff --git a/opal/mca/common/libfabric/libfabric/prov/sockets/src/sock_dom.c b/opal/mca/common/libfabric/libfabric/prov/sockets/src/sock_dom.c index 24522211f4..c2e5682176 100644 --- a/opal/mca/common/libfabric/libfabric/prov/sockets/src/sock_dom.c +++ b/opal/mca/common/libfabric/libfabric/prov/sockets/src/sock_dom.c @@ -34,6 +34,7 @@ # include #endif /* HAVE_CONFIG_H */ +#include #include #include @@ -247,13 +248,21 @@ struct sock_mr *sock_mr_verify_desc(struct sock_domain *domain, void *desc, return sock_mr_verify_key(domain, key, buf, len, access); } -static int sock_regattr(struct fid_domain *domain, const struct fi_mr_attr *attr, +static int sock_regattr(struct fid *fid, const struct fi_mr_attr *attr, uint64_t flags, struct fid_mr **mr) { struct fi_eq_entry eq_entry; struct sock_domain *dom; struct sock_mr *_mr; uint64_t key; + struct fid_domain *domain; + + if (fid->fclass != FI_CLASS_DOMAIN) { + SOCK_LOG_ERROR("memory registration only supported " + "for struct fid_domain\n"); + return -FI_EINVAL; + } + domain = container_of(fid, struct fid_domain, fid); dom = container_of(domain, struct sock_domain, dom_fid); if (!(dom->info.mode & FI_PROV_MR_ATTR) && @@ -306,7 +315,7 @@ err: return -errno; } -static int sock_regv(struct fid_domain *domain, const struct iovec *iov, +static int sock_regv(struct fid *fid, const struct iovec *iov, size_t count, uint64_t access, uint64_t offset, uint64_t requested_key, uint64_t flags, struct fid_mr **mr, void *context) @@ -319,10 +328,10 @@ static int sock_regv(struct fid_domain *domain, const struct iovec *iov, attr.offset = offset; attr.requested_key = requested_key; attr.context = context; - return sock_regattr(domain, &attr, flags, mr); + return sock_regattr(fid, &attr, flags, mr); } -static int sock_reg(struct fid_domain *domain, const void *buf, size_t len, +static int sock_reg(struct fid *fid, const void *buf, size_t len, uint64_t access, uint64_t offset, uint64_t requested_key, uint64_t flags, struct fid_mr **mr, void *context) { @@ -330,7 +339,7 @@ static int sock_reg(struct fid_domain *domain, const void *buf, size_t len, iov.iov_base = (void *) buf; iov.iov_len = len; - return sock_regv(domain, &iov, 1, access, offset, requested_key, + return sock_regv(fid, &iov, 1, access, offset, requested_key, flags, mr, context); } @@ -355,7 +364,7 @@ int sock_dom_bind(struct fid *fid, struct fid *bfid, uint64_t flags) int sock_endpoint(struct fid_domain *domain, struct fi_info *info, struct fid_ep **ep, void *context) { - switch (info->ep_type) { + switch (info->ep_attr->type) { case FI_EP_RDM: return sock_rdm_ep(domain, info, ep, context); case FI_EP_DGRAM: @@ -370,7 +379,7 @@ int sock_endpoint(struct fid_domain *domain, struct fi_info *info, int sock_scalable_ep(struct fid_domain *domain, struct fi_info *info, struct fid_ep **sep, void *context) { - switch (info->ep_type) { + switch (info->ep_attr->type) { case FI_EP_RDM: return sock_rdm_sep(domain, info, sep, context); case FI_EP_DGRAM: diff --git a/opal/mca/common/libfabric/libfabric/prov/sockets/src/sock_ep.c b/opal/mca/common/libfabric/libfabric/prov/sockets/src/sock_ep.c index e769b7815e..8061f78e87 100644 --- a/opal/mca/common/libfabric/libfabric/prov/sockets/src/sock_ep.c +++ b/opal/mca/common/libfabric/libfabric/prov/sockets/src/sock_ep.c @@ -34,6 +34,7 @@ # include #endif /* HAVE_CONFIG_H */ +#include #include #include @@ -142,7 +143,9 @@ static int sock_ctx_bind_cq(struct fid *fid, struct fid *bfid, uint64_t flags) tx_ctx->comp.write_cq_event = 1; } + fastlock_acquire(&sock_cq->list_lock); dlist_insert_tail(&tx_ctx->cq_entry, &sock_cq->tx_list); + fastlock_release(&sock_cq->list_lock); break; case FI_CLASS_RX_CTX: @@ -153,7 +156,9 @@ static int sock_ctx_bind_cq(struct fid *fid, struct fid *bfid, uint64_t flags) rx_ctx->comp.recv_cq_event = 1; } + fastlock_acquire(&sock_cq->list_lock); dlist_insert_tail(&rx_ctx->cq_entry, &sock_cq->rx_list); + fastlock_release(&sock_cq->list_lock); break; case FI_CLASS_STX_CTX: @@ -176,7 +181,9 @@ static int sock_ctx_bind_cq(struct fid *fid, struct fid *bfid, uint64_t flags) tx_ctx->comp.write_cq_event = 1; } + fastlock_acquire(&sock_cq->list_lock); dlist_insert_tail(&tx_ctx->cq_entry, &sock_cq->tx_list); + fastlock_release(&sock_cq->list_lock); break; default: @@ -205,8 +212,9 @@ static int sock_ctx_bind_cntr(struct fid *fid, struct fid *bfid, uint64_t flags) if (flags & FI_WRITE) tx_ctx->comp.write_cntr = cntr; + fastlock_acquire(&cntr->list_lock); dlist_insert_tail(&tx_ctx->cntr_entry, &cntr->tx_list); - + fastlock_release(&cntr->list_lock); break; case FI_CLASS_RX_CTX: @@ -220,7 +228,9 @@ static int sock_ctx_bind_cntr(struct fid *fid, struct fid *bfid, uint64_t flags) if (flags & FI_REMOTE_WRITE) rx_ctx->comp.rem_write_cntr = cntr; + fastlock_acquire(&cntr->list_lock); dlist_insert_tail(&rx_ctx->cntr_entry, &cntr->rx_list); + fastlock_release(&cntr->list_lock); break; case FI_CLASS_STX_CTX: @@ -234,7 +244,9 @@ static int sock_ctx_bind_cntr(struct fid *fid, struct fid *bfid, uint64_t flags) if (flags & FI_WRITE) tx_ctx->comp.write_cntr = cntr; + fastlock_acquire(&cntr->list_lock); dlist_insert_tail(&tx_ctx->cntr_entry, &cntr->tx_list); + fastlock_release(&cntr->list_lock); break; @@ -377,10 +389,17 @@ static int sock_ctx_getopt(fid_t fid, int level, int optname, switch (optname) { case FI_OPT_MIN_MULTI_RECV: + if (*optlen < sizeof(size_t)) + return -FI_ETOOSMALL; *(size_t *)optval = rx_ctx->min_multi_recv; *optlen = sizeof(size_t); break; - + case FI_OPT_CM_DATA_SIZE: + if (*optlen < sizeof(size_t)) + return -FI_ETOOSMALL; + *((size_t *) optval) = SOCK_EP_MAX_CM_DATA_SZ; + *optlen = sizeof(size_t); + break; default: return -FI_ENOPROTOOPT; } @@ -475,6 +494,7 @@ struct fi_ops_ep sock_ctx_ep_ops = { static int sock_ep_close(struct fid *fid) { struct sock_ep *sock_ep; + char c = 0; switch(fid->fclass) { case FI_CLASS_EP: @@ -510,6 +530,22 @@ static int sock_ep_close(struct fid *fid) free(sock_ep->src_addr); if (sock_ep->dest_addr) free(sock_ep->dest_addr); + + if (sock_ep->ep_type == FI_EP_MSG) { + + sock_ep->cm.do_listen = 0; + + if (write(sock_ep->cm.signal_fds[0], &c, 1) != 1) + SOCK_LOG_INFO("Failed to signal\n"); + + if (sock_ep->cm.listener_thread && + pthread_join(sock_ep->cm.listener_thread, NULL)) { + SOCK_LOG_INFO("pthread join failed\n"); + } + + close(sock_ep->cm.signal_fds[0]); + close(sock_ep->cm.signal_fds[1]); + } atomic_dec(&sock_ep->domain->ref); free(sock_ep); @@ -603,8 +639,10 @@ static int sock_ep_bind(struct fid *fid, struct fid *bfid, uint64_t flags) if (flags & FI_COMPLETION) ep->comp.recv_cq_event = 1; } - + + fastlock_acquire(&cq->list_lock); dlist_insert_tail(&rx_ctx->cq_entry, &cq->rx_list); + fastlock_release(&cq->list_lock); continue; } @@ -669,7 +707,9 @@ static int sock_ep_bind(struct fid *fid, struct fid *bfid, uint64_t flags) if (flags & FI_REMOTE_WRITE) rx_ctx->comp.rem_write_cntr = cntr; + fastlock_acquire(&cntr->list_lock); dlist_insert_tail(&rx_ctx->cntr_entry, &cntr->rx_list); + fastlock_release(&cntr->list_lock); continue; } @@ -832,6 +872,36 @@ int sock_ep_enable(struct fid_ep *ep) return 0; } +int sock_ep_disable(struct fid_ep *ep) +{ + int i; + struct sock_ep *sock_ep; + + sock_ep = container_of(ep, struct sock_ep, ep); + + if (sock_ep->tx_ctx && + sock_ep->tx_ctx->fid.ctx.fid.fclass == FI_CLASS_TX_CTX) { + sock_ep->tx_ctx->enabled = 0; + } + + if (sock_ep->rx_ctx && + sock_ep->rx_ctx->ctx.fid.fclass == FI_CLASS_RX_CTX) { + sock_ep->rx_ctx->enabled = 0; + } + + for (i = 0; i < sock_ep->ep_attr.tx_ctx_cnt; i++) { + if (sock_ep->tx_array[i]) + sock_ep->tx_array[i]->enabled = 0; + } + + for (i = 0; i < sock_ep->ep_attr.rx_ctx_cnt; i++) { + if (sock_ep->rx_array[i]) + sock_ep->rx_array[i]->enabled = 0; + } + sock_ep->is_disabled = 1; + return 0; +} + static int sock_ep_getopt(fid_t fid, int level, int optname, void *optval, size_t *optlen) { @@ -1055,14 +1125,13 @@ int sock_srx_ctx(struct fid_domain *domain, struct fi_info *sock_fi_info(enum fi_ep_type ep_type, struct fi_info *hints, void *src_addr, void *dest_addr) { - struct fi_info *_info = fi_allocinfo_internal(); + struct fi_info *_info = fi_allocinfo(); if (!_info) return NULL; _info->src_addr = calloc(1, sizeof(struct sockaddr_in)); _info->dest_addr = calloc(1, sizeof(struct sockaddr_in)); - _info->ep_type = ep_type; _info->mode = SOCK_MODE; _info->addr_format = FI_SOCKADDR_IN; _info->dest_addrlen =_info->src_addrlen = sizeof(struct sockaddr_in); @@ -1075,18 +1144,21 @@ struct fi_info *sock_fi_info(enum fi_ep_type ep_type, memcpy(_info->dest_addr, dest_addr, sizeof(struct sockaddr_in)); } - if (hints->caps) - _info->caps = hints->caps; + if (hints) { + if (hints->caps) + _info->caps = hints->caps; - if (hints->ep_attr) - *(_info->ep_attr) = *(hints->ep_attr); + if (hints->ep_attr) + *(_info->ep_attr) = *(hints->ep_attr); - if (hints->tx_attr) - *(_info->tx_attr) = *(hints->tx_attr); + if (hints->tx_attr) + *(_info->tx_attr) = *(hints->tx_attr); - if (hints->rx_attr) - *(_info->rx_attr) = *(hints->rx_attr); - + if (hints->rx_attr) + *(_info->rx_attr) = *(hints->rx_attr); + } + + _info->ep_attr->type = ep_type; *(_info->domain_attr) = sock_domain_attr; *(_info->fabric_attr) = sock_fabric_attr; @@ -1099,13 +1171,13 @@ struct fi_info *sock_fi_info(enum fi_ep_type ep_type, int sock_alloc_endpoint(struct fid_domain *domain, struct fi_info *info, struct sock_ep **ep, void *context, size_t fclass) -{ - int ret; +{ + int ret, flags; struct sock_ep *sock_ep; struct sock_tx_ctx *tx_ctx; struct sock_rx_ctx *rx_ctx; struct sock_domain *sock_dom; - + if (info) { ret = sock_verify_info(info); if (ret) { @@ -1114,11 +1186,7 @@ int sock_alloc_endpoint(struct fid_domain *domain, struct fi_info *info, } } - if (domain) - sock_dom = container_of(domain, struct sock_domain, dom_fid); - else - sock_dom = NULL; - + sock_dom = container_of(domain, struct sock_domain, dom_fid); sock_ep = (struct sock_ep*)calloc(1, sizeof(*sock_ep)); if (!sock_ep) return -FI_ENOMEM; @@ -1158,9 +1226,11 @@ int sock_alloc_endpoint(struct fid_domain *domain, struct fi_info *info, fastlock_release(&sock_dom->lock); if (info) { - sock_ep->ep_type = info->ep_type; + sock_ep->ep_type = info->ep_attr->type; sock_ep->info.caps = info->caps; sock_ep->info.addr_format = FI_SOCKADDR_IN; + sock_ep->ep_attr.tx_ctx_cnt = info->ep_attr->tx_ctx_cnt; + sock_ep->ep_attr.rx_ctx_cnt = info->ep_attr->rx_ctx_cnt; if (info->src_addr) { sock_ep->src_addr = calloc(1, sizeof(struct sockaddr_in)); @@ -1178,14 +1248,6 @@ int sock_alloc_endpoint(struct fid_domain *domain, struct fi_info *info, sizeof(struct sockaddr_in)); } - if (info->ep_attr) { - sock_ep->ep_attr = *info->ep_attr; - sock_ep->ep_attr.total_buffered_recv = - sock_ep->ep_attr.total_buffered_recv ? - sock_ep->ep_attr.total_buffered_recv : - SOCK_EP_MAX_BUFF_RECV; - } - if (info->tx_attr) { sock_ep->tx_attr = *info->tx_attr; sock_ep->op_flags = info->tx_attr->op_flags; @@ -1255,6 +1317,18 @@ int sock_alloc_endpoint(struct fid_domain *domain, struct fi_info *info, if (info) { memcpy(&sock_ep->info, info, sizeof(struct fi_info)); } + + if (sock_ep->ep_type == FI_EP_MSG) { + dlist_init(&sock_ep->cm.msg_list); + if (socketpair(AF_UNIX, SOCK_STREAM, 0, + sock_ep->cm.signal_fds) < 0) + goto err; + + flags = fcntl(sock_ep->cm.signal_fds[1], F_GETFL, 0); + if (fcntl(sock_ep->cm.signal_fds[1], F_SETFL, flags | O_NONBLOCK)) + SOCK_LOG_ERROR("fcntl failed"); + } + sock_ep->domain = sock_dom; atomic_inc(&sock_dom->ref); return 0; diff --git a/opal/mca/common/libfabric/libfabric/prov/sockets/src/sock_ep_dgram.c b/opal/mca/common/libfabric/libfabric/prov/sockets/src/sock_ep_dgram.c index 95462ea2a0..ccab87e899 100644 --- a/opal/mca/common/libfabric/libfabric/prov/sockets/src/sock_ep_dgram.c +++ b/opal/mca/common/libfabric/libfabric/prov/sockets/src/sock_ep_dgram.c @@ -56,10 +56,9 @@ #include "sock.h" const struct fi_ep_attr sock_dgram_ep_attr = { + .type = FI_EP_DGRAM, .protocol = FI_PROTO_SOCK_TCP, .max_msg_size = SOCK_EP_MAX_MSG_SZ, - .inject_size = SOCK_EP_MAX_INJECT_SZ, - .total_buffered_recv = SOCK_EP_MAX_BUFF_RECV, .max_order_raw_size = SOCK_EP_MAX_ORDER_RAW_SZ, .max_order_war_size = SOCK_EP_MAX_ORDER_WAR_SZ, .max_order_waw_size = SOCK_EP_MAX_ORDER_WAW_SZ, @@ -155,13 +154,6 @@ int sock_dgram_verify_ep_attr(struct fi_ep_attr *ep_attr, if (ep_attr->max_msg_size > sock_dgram_ep_attr.max_msg_size) return -FI_ENODATA; - if (ep_attr->inject_size > sock_dgram_ep_attr.inject_size) - return -FI_ENODATA; - - if (ep_attr->total_buffered_recv > - sock_dgram_ep_attr.total_buffered_recv) - return -FI_ENODATA; - if (ep_attr->max_order_raw_size > sock_dgram_ep_attr.max_order_raw_size) return -FI_ENODATA; @@ -223,13 +215,10 @@ int sock_dgram_getinfo(uint32_t version, const char *node, const char *service, char hostname[HOST_NAME_MAX]; if (!info) - return -FI_EBADFLAGS; + return -FI_EINVAL; *info = NULL; - if (!node && !service && !hints) - return -FI_EBADFLAGS; - if (version != FI_VERSION(SOCK_MAJOR_VERSION, SOCK_MINOR_VERSION)) return -FI_ENODATA; @@ -266,7 +255,7 @@ int sock_dgram_getinfo(uint32_t version, const char *node, const char *service, ret = getaddrinfo(node ? node : hostname, service, &sock_hints, &result_ptr); if (ret != 0) { - ret = FI_ENODATA; + ret = -FI_ENODATA; SOCK_LOG_INFO("getaddrinfo failed!\n"); goto err; } @@ -297,7 +286,7 @@ int sock_dgram_getinfo(uint32_t version, const char *node, const char *service, ret = getaddrinfo(node, service, &sock_hints, &result_ptr); if (ret != 0) { - ret = FI_ENODATA; + ret = -FI_ENODATA; SOCK_LOG_INFO("getaddrinfo failed!\n"); goto err; } @@ -333,7 +322,7 @@ int sock_dgram_getinfo(uint32_t version, const char *node, const char *service, result->ai_addrlen); if ( ret != 0) { SOCK_LOG_ERROR("Failed to create udp socket\n"); - ret = FI_ENODATA; + ret = -FI_ENODATA; goto err; } @@ -346,7 +335,7 @@ int sock_dgram_getinfo(uint32_t version, const char *node, const char *service, ret = getsockname(udp_sock, (struct sockaddr*)src_addr, &len); if (ret != 0) { SOCK_LOG_ERROR("getsockname failed\n"); - ret = FI_ENODATA; + ret = -FI_ENODATA; goto err; } close(udp_sock); @@ -356,14 +345,12 @@ int sock_dgram_getinfo(uint32_t version, const char *node, const char *service, } if (hints && hints->src_addr) { - if (!src_addr) { - src_addr = calloc(1, sizeof(struct sockaddr_in)); - if (!src_addr) { - ret = -FI_ENOMEM; - goto err; - } + if(hints->src_addrlen != sizeof(struct sockaddr_in)){ + SOCK_LOG_ERROR("Sockets provider requires src_addrlen to be sizeof(struct sockaddr_in); got %zu\n", + hints->src_addrlen); + ret = -FI_ENODATA; + goto err; } - assert(hints->src_addrlen == sizeof(struct sockaddr_in)); memcpy(src_addr, hints->src_addr, hints->src_addrlen); } @@ -375,7 +362,12 @@ int sock_dgram_getinfo(uint32_t version, const char *node, const char *service, goto err; } } - assert(hints->dest_addrlen == sizeof(struct sockaddr_in)); + if(hints->dest_addrlen != sizeof(struct sockaddr_in)){ + SOCK_LOG_ERROR("Sockets provider requires dest_addrlen to be sizeof(struct sockaddr_in); got %zu\n", + hints->dest_addrlen); + ret = -FI_ENODATA; + goto err; + } memcpy(dest_addr, hints->dest_addr, hints->dest_addrlen); } @@ -393,7 +385,7 @@ int sock_dgram_getinfo(uint32_t version, const char *node, const char *service, _info = sock_dgram_fi_info(hints, src_addr, dest_addr); if (!_info) { - ret = FI_ENOMEM; + ret = -FI_ENOMEM; goto err; } diff --git a/opal/mca/common/libfabric/libfabric/prov/sockets/src/sock_ep_msg.c b/opal/mca/common/libfabric/libfabric/prov/sockets/src/sock_ep_msg.c index 651a4718b2..c9d266ffeb 100644 --- a/opal/mca/common/libfabric/libfabric/prov/sockets/src/sock_ep_msg.c +++ b/opal/mca/common/libfabric/libfabric/prov/sockets/src/sock_ep_msg.c @@ -34,6 +34,7 @@ # include #endif /* HAVE_CONFIG_H */ +#include #include #include #include @@ -57,10 +58,9 @@ #include "sock_util.h" const struct fi_ep_attr sock_msg_ep_attr = { + .type = FI_EP_MSG, .protocol = FI_PROTO_SOCK_TCP, .max_msg_size = SOCK_EP_MAX_MSG_SZ, - .inject_size = SOCK_EP_MAX_INJECT_SZ, - .total_buffered_recv = SOCK_EP_MAX_BUFF_RECV, .max_order_raw_size = SOCK_EP_MAX_ORDER_RAW_SZ, .max_order_war_size = SOCK_EP_MAX_ORDER_WAR_SZ, .max_order_waw_size = SOCK_EP_MAX_ORDER_WAW_SZ, @@ -156,13 +156,6 @@ int sock_msg_verify_ep_attr(struct fi_ep_attr *ep_attr, if (ep_attr->max_msg_size > sock_msg_ep_attr.max_msg_size) return -FI_ENODATA; - if (ep_attr->inject_size > sock_msg_ep_attr.inject_size) - return -FI_ENODATA; - - if (ep_attr->total_buffered_recv > - sock_msg_ep_attr.total_buffered_recv) - return -FI_ENODATA; - if (ep_attr->max_order_raw_size > sock_msg_ep_attr.max_order_raw_size) return -FI_ENODATA; @@ -224,13 +217,10 @@ int sock_msg_getinfo(uint32_t version, const char *node, const char *service, char hostname[HOST_NAME_MAX]; if (!info) - return -FI_EBADFLAGS; + return -FI_EINVAL; *info = NULL; - if (!node && !service && !hints) - return -FI_EBADFLAGS; - if (version != FI_VERSION(SOCK_MAJOR_VERSION, SOCK_MINOR_VERSION)) return -FI_ENODATA; @@ -267,7 +257,7 @@ int sock_msg_getinfo(uint32_t version, const char *node, const char *service, ret = getaddrinfo(node ? node : hostname, service, &sock_hints, &result_ptr); if (ret != 0) { - ret = FI_ENODATA; + ret = -FI_ENODATA; SOCK_LOG_INFO("getaddrinfo failed!\n"); goto err; } @@ -298,7 +288,7 @@ int sock_msg_getinfo(uint32_t version, const char *node, const char *service, ret = getaddrinfo(node, service, &sock_hints, &result_ptr); if (ret != 0) { - ret = FI_ENODATA; + ret = -FI_ENODATA; SOCK_LOG_INFO("getaddrinfo failed!\n"); goto err; } @@ -334,7 +324,7 @@ int sock_msg_getinfo(uint32_t version, const char *node, const char *service, result->ai_addrlen); if ( ret != 0) { SOCK_LOG_ERROR("Failed to create udp socket\n"); - ret = FI_ENODATA; + ret = -FI_ENODATA; goto err; } @@ -347,7 +337,7 @@ int sock_msg_getinfo(uint32_t version, const char *node, const char *service, ret = getsockname(udp_sock, (struct sockaddr*)src_addr, &len); if (ret != 0) { SOCK_LOG_ERROR("getsockname failed\n"); - ret = FI_ENODATA; + ret = -FI_ENODATA; goto err; } close(udp_sock); @@ -357,14 +347,12 @@ int sock_msg_getinfo(uint32_t version, const char *node, const char *service, } if (hints && hints->src_addr) { - if (!src_addr) { - src_addr = calloc(1, sizeof(struct sockaddr_in)); - if (!src_addr) { - ret = -FI_ENOMEM; - goto err; - } + if (hints->src_addrlen != sizeof(struct sockaddr_in)) { + SOCK_LOG_ERROR("Sockets provider requires src_addrlen to be sizeof(struct sockaddr_in); got %zu\n", + hints->src_addrlen); + ret = -FI_ENODATA; + goto err; } - assert(hints->src_addrlen == sizeof(struct sockaddr_in)); memcpy(src_addr, hints->src_addr, hints->src_addrlen); } @@ -376,7 +364,12 @@ int sock_msg_getinfo(uint32_t version, const char *node, const char *service, goto err; } } - assert(hints->dest_addrlen == sizeof(struct sockaddr_in)); + if (hints->dest_addrlen != sizeof(struct sockaddr_in)) { + SOCK_LOG_ERROR("Sockets provider requires dest_addrlen to be sizeof(struct sockaddr_in); got %zu\n", + hints->dest_addrlen); + ret = -FI_ENODATA; + goto err; + } memcpy(dest_addr, hints->dest_addr, hints->dest_addrlen); } @@ -394,7 +387,7 @@ int sock_msg_getinfo(uint32_t version, const char *node, const char *service, _info = sock_msg_fi_info(hints, src_addr, dest_addr); if (!_info) { - ret = FI_ENOMEM; + ret = -FI_ENOMEM; goto err; } @@ -434,6 +427,7 @@ static int sock_ep_cm_getname(fid_t fid, void *addr, size_t *addrlen) switch(fid->fclass) { case FI_CLASS_EP: + case FI_CLASS_SEP: sock_ep = container_of(fid, struct sock_ep, ep.fid); memcpy(addr, sock_ep->src_addr, *addrlen); break; @@ -479,9 +473,43 @@ static int sock_ep_cm_create_socket() return sock; } +static int sock_ep_cm_enqueue_msg(struct sock_cm_entry *cm, + const struct sockaddr_in *addr, + void *msg, size_t len) +{ + char c = 0; + int ret = 0; + struct sock_cm_msg_list_entry *list_entry; -static int sock_ep_cm_send_msg(int sock_fd, - const struct sockaddr_in *addr, void *msg, size_t len) + fastlock_acquire(&cm->lock); + list_entry = calloc(1, sizeof(struct sock_cm_msg_list_entry) + len); + if (!list_entry) { + ret = -FI_ENOMEM; + goto out; + } + + list_entry->msg_len = len; + memcpy(&list_entry->msg, msg, len); + memcpy(&list_entry->addr, addr, sizeof(struct sockaddr_in)); + dlist_insert_tail(&list_entry->entry, &cm->msg_list); + ret = write(cm->signal_fds[0], &c, 1); + if (ret != 1) { + SOCK_LOG_INFO("failed to signal\n"); + ret = -FI_EIO; + } else { + ret = 0; + SOCK_LOG_INFO("Enqueued CM Msg\n"); + } +out: + fastlock_release(&cm->lock); + return ret; +} + + + +static int sock_ep_cm_send_msg(struct sock_cm_entry *cm, + const struct sockaddr_in *addr, + void *msg, size_t len) { int ret, retry = 0; unsigned char response = 0; @@ -493,21 +521,21 @@ static int sock_ep_cm_send_msg(int sock_fd, SOCK_LOG_INFO("Sending message to %s:%d\n", sa_ip, ntohs(addr->sin_port)); - while (retry < SOCK_EP_MAX_RETRY) { - ret = sendto(sock_fd, (char *)msg, len, 0, + while (retry < SOCK_EP_MAX_RETRY && (volatile int)cm->do_listen) { + ret = sendto(cm->sock, (char *)msg, len, 0, (struct sockaddr *) addr, sizeof *addr); SOCK_LOG_INFO("Total Sent: %d\n", ret); if (ret < 0) return -1; - ret = fi_poll_fd(sock_fd, SOCK_CM_COMM_TIMEOUT); + ret = fi_poll_fd(cm->sock, SOCK_CM_COMM_TIMEOUT); retry++; if (ret <= 0) { continue; } addr_len = sizeof(struct sockaddr_in); - ret = recvfrom(sock_fd, &response, sizeof(response), 0, + ret = recvfrom(cm->sock, &response, sizeof(response), 0, (struct sockaddr *) &from_addr, &addr_len); SOCK_LOG_INFO("Received ACK: %d\n", ret); if (ret == sizeof(response)) @@ -516,13 +544,14 @@ static int sock_ep_cm_send_msg(int sock_fd, return -1; } -static int sock_ep_cm_send_ack(int sock_fd, struct sockaddr_in *addr) +static int sock_ep_cm_send_ack(struct sock_cm_entry *cm, struct sockaddr_in *addr) { int ack_sent = 0, retry = 0, ret; - unsigned char response; + unsigned char response = 0; - while(!ack_sent && retry < SOCK_EP_MAX_RETRY) { - ret = sendto(sock_fd, &response, sizeof(response), 0, + while(!ack_sent && retry < SOCK_EP_MAX_RETRY && + (volatile int) cm->do_listen) { + ret = sendto(cm->sock, &response, sizeof(response), 0, (struct sockaddr *) addr, sizeof *addr); retry++; @@ -539,36 +568,85 @@ static int sock_ep_cm_send_ack(int sock_fd, struct sockaddr_in *addr) return ack_sent; } +static void sock_ep_cm_flush_msg(struct sock_cm_entry *cm) +{ + struct dlist_entry *entry; + struct sock_cm_msg_list_entry *msg_entry; + + fastlock_acquire(&cm->lock); + while (!dlist_empty(&cm->msg_list)) { + + SOCK_LOG_INFO("Flushing CM message\n"); + entry = cm->msg_list.next; + msg_entry = container_of(entry, + struct sock_cm_msg_list_entry, entry); + if (sock_ep_cm_send_msg(cm, &msg_entry->addr, + &msg_entry->msg, msg_entry->msg_len)) + SOCK_LOG_INFO("Failed to send out cm message\n"); + dlist_remove(entry); + free(msg_entry); + } + fastlock_release(&cm->lock); +} + + static void *sock_msg_ep_listener_thread (void *data) { + struct pollfd poll_fds[2]; struct sock_ep *ep = data; struct sock_conn_response *conn_response; struct fi_eq_cm_entry *cm_entry; - struct fi_eq_err_entry cm_err_entry; + struct fi_eq_err_entry *cm_err_entry; struct sockaddr_in from_addr; socklen_t addr_len; int ret, user_data_sz, entry_sz; struct fid_ep *fid_ep; struct sock_ep *sock_ep; + char tmp = 0; + + ep->cm.sock = sock_ep_cm_create_socket(); + if (!ep->cm.sock) { + SOCK_LOG_ERROR("Cannot open socket\n"); + return NULL; + } SOCK_LOG_INFO("Starting listener thread for EP: %p\n", ep); - conn_response = malloc(sizeof(*conn_response) + SOCK_EP_MAX_CM_DATA_SZ); - cm_entry = malloc(sizeof(*cm_entry) + SOCK_EP_MAX_CM_DATA_SZ); - if (!conn_response || !cm_entry) { + conn_response = calloc(1, sizeof(*conn_response) + SOCK_EP_MAX_CM_DATA_SZ); + if (!conn_response) { SOCK_LOG_ERROR("cannot allocate\n"); return NULL; } - ep->do_listen = 1; + cm_entry = calloc(1, sizeof(*cm_entry) + SOCK_EP_MAX_CM_DATA_SZ); + if (!cm_entry) { + free (conn_response); + SOCK_LOG_ERROR("cannot allocate\n"); + return NULL; + } + + ep->cm.do_listen = 1; + poll_fds[0].fd = ep->cm.sock; + poll_fds[1].fd = ep->cm.signal_fds[1]; + poll_fds[0].events = poll_fds[1].events = POLLIN; + while((volatile int)ep->cm.do_listen) { + + if (poll(poll_fds, 2, -1) > 0) { + if (poll_fds[1].revents & POLLIN) { + ret = read(ep->cm.signal_fds[1], &tmp, 1); + if (ret != 1) { + SOCK_LOG_INFO("Invalid signal\n"); + break; + } + sock_ep_cm_flush_msg(&ep->cm); + continue; + } + } else { + break; + } - while((volatile int)ep->do_listen) { - ret = fi_poll_fd(ep->socket, -1); - if (ret <= 0) - continue; - addr_len = sizeof(struct sockaddr_in); - ret = recvfrom(ep->socket, (char*)conn_response, + ret = recvfrom(ep->cm.sock, (char*)conn_response, sizeof(*conn_response) + SOCK_EP_MAX_CM_DATA_SZ, 0, (struct sockaddr *) &from_addr, &addr_len); if (ret <= 0) @@ -577,7 +655,7 @@ static void *sock_msg_ep_listener_thread (void *data) SOCK_LOG_INFO("Total received: %d\n", ret); if (ret < sizeof(*conn_response) || - !sock_ep_cm_send_ack(ep->socket, &from_addr)) + !sock_ep_cm_send_ack(&ep->cm, &from_addr)) continue; user_data_sz = ret - sizeof(*conn_response); @@ -585,6 +663,7 @@ static void *sock_msg_ep_listener_thread (void *data) switch (conn_response->hdr.type) { case SOCK_CONN_ACCEPT: SOCK_LOG_INFO("Received SOCK_CONN_ACCEPT\n"); + entry_sz = sizeof(*cm_entry) + user_data_sz; memset(cm_entry, 0, sizeof *cm_entry); cm_entry->fid = conn_response->hdr.c_fid; @@ -595,6 +674,12 @@ static void *sock_msg_ep_listener_thread (void *data) fid_ep = container_of(conn_response->hdr.c_fid, struct fid_ep, fid); sock_ep = container_of(fid_ep, struct sock_ep, ep); + + if (sock_ep->is_disabled || + sock_ep->cm.shutdown_received) + break; + + sock_ep->peer_fid = conn_response->hdr.s_fid; sock_ep->connected = 1; ((struct sockaddr_in*)sock_ep->dest_addr)->sin_port = @@ -608,16 +693,56 @@ static void *sock_msg_ep_listener_thread (void *data) case SOCK_CONN_REJECT: SOCK_LOG_INFO("Received SOCK_CONN_REJECT\n"); - memset(&cm_err_entry, 0, sizeof cm_err_entry); - cm_err_entry.fid = conn_response->hdr.c_fid; - cm_err_entry.err = -FI_ECONNREFUSED; + + fid_ep = container_of(conn_response->hdr.c_fid, + struct fid_ep, fid); + sock_ep = container_of(fid_ep, struct sock_ep, ep); - /* TODO: Fix reporting err_data */ - //cm_err_entry->err_data = &conn_response->user_data; + if (sock_ep->is_disabled || + sock_ep->cm.shutdown_received) + break; - if (sock_eq_report_event(ep->eq, FI_ECONNREFUSED, - &cm_err_entry, - sizeof cm_err_entry, 0)) + cm_err_entry = calloc(1, sizeof(*cm_err_entry) + user_data_sz); + if (!cm_err_entry) { + SOCK_LOG_ERROR("cannot allocate memory\n"); + goto out; + } + + memset(cm_err_entry, 0, sizeof(*cm_err_entry) + user_data_sz); + cm_err_entry->fid = conn_response->hdr.c_fid; + cm_err_entry->err = -FI_ECONNREFUSED; + + if (user_data_sz > 0) + memcpy(cm_err_entry->err_data, + &conn_response->user_data, user_data_sz); + + if (sock_eq_report_event(sock_ep->eq, FI_ECONNREFUSED, + cm_err_entry, + sizeof(*cm_err_entry) + + user_data_sz, 0)) + SOCK_LOG_ERROR("Error in writing to EQ\n"); + free(cm_err_entry); + goto out; + + case SOCK_CONN_SHUTDOWN: + SOCK_LOG_INFO("Received SOCK_CONN_SHUTDOWN\n"); + + entry_sz = sizeof(*cm_entry) + user_data_sz; + memset(cm_entry, 0, sizeof *cm_entry); + cm_entry->fid = conn_response->hdr.c_fid; + + memcpy(&cm_entry->data, &conn_response->user_data, + user_data_sz); + + fid_ep = container_of(conn_response->hdr.c_fid, + struct fid_ep, fid); + sock_ep = container_of(fid_ep, struct sock_ep, ep); + if (sock_ep->cm.shutdown_received) + break; + + sock_ep->cm.shutdown_received = 1; + if (sock_eq_report_event(ep->eq, FI_SHUTDOWN, cm_entry, + entry_sz, 0)) SOCK_LOG_ERROR("Error in writing to EQ\n"); goto out; @@ -630,8 +755,8 @@ static void *sock_msg_ep_listener_thread (void *data) out: free(conn_response); free(cm_entry); - close(ep->socket); - ep->socket = 0; + close(ep->cm.sock); + ep->cm.listener_thread = 0L; return NULL; } @@ -641,13 +766,14 @@ static int sock_ep_cm_connect(struct fid_ep *ep, const void *addr, struct sock_conn_req *req; struct sock_ep *_ep; struct sock_eq *_eq; + int ret = 0; + _ep = container_of(ep, struct sock_ep, ep); _eq = _ep->eq; if (!_eq || paramlen > SOCK_EP_MAX_CM_DATA_SZ) return -FI_EINVAL; - req = (struct sock_conn_req*)calloc(1, - sizeof(*req) + paramlen); + req = (struct sock_conn_req*)calloc(1, sizeof(*req) + paramlen); if (!req) return -FI_ENOMEM; @@ -661,37 +787,30 @@ static int sock_ep_cm_connect(struct fid_ep *ep, const void *addr, req->ep_id = _ep->ep_id; req->hdr.c_fid = &ep->fid; req->hdr.s_fid = 0; - memcpy(&req->info, &_ep->info, sizeof(struct fi_info)); + req->info = _ep->info; memcpy(&req->src_addr, _ep->src_addr, sizeof(struct sockaddr_in)); memcpy(&req->dest_addr, _ep->info.dest_addr, sizeof(struct sockaddr_in)); - memcpy(&req->tx_attr, _ep->info.tx_attr, sizeof(struct fi_tx_attr)); - memcpy(&req->rx_attr, _ep->info.rx_attr, sizeof(struct fi_rx_attr)); - memcpy(&req->ep_attr, _ep->info.ep_attr, sizeof(struct fi_ep_attr)); - memcpy(&req->domain_attr, _ep->info.domain_attr, sizeof(struct fi_domain_attr)); - memcpy(&req->fabric_attr, _ep->info.fabric_attr, sizeof(struct fi_fabric_attr)); + req->tx_attr = *_ep->info.tx_attr; + req->rx_attr = *_ep->info.rx_attr; + req->ep_attr = *_ep->info.ep_attr; + req->domain_attr = *_ep->info.domain_attr; + req->fabric_attr = *_ep->info.fabric_attr; if (param && paramlen) memcpy(&req->user_data, param, paramlen); - if (!_ep->socket) { - _ep->socket = sock_ep_cm_create_socket(); - if (!_ep->socket) { - free (req); - return -FI_EIO; - } - } - - if (sock_ep_cm_send_msg(_ep->socket, addr, req, sizeof (*req) + paramlen)) - return -FI_EIO; - - if (pthread_create(&_ep->listener_thread, NULL, - sock_msg_ep_listener_thread, (void *)_ep)) { - SOCK_LOG_ERROR("Couldn't create listener thread\n"); - free (req); - return -FI_EINVAL; + memcpy(&_ep->cm_addr, addr, sizeof(struct sockaddr_in)); + if (sock_ep_cm_enqueue_msg(&_ep->cm, addr, req, + sizeof (*req) + paramlen)) { + ret = -FI_EIO; + goto err; } free (req); return 0; + +err: + free(req); + return ret; } static int sock_ep_cm_accept(struct fid_ep *ep, const void *param, size_t paramlen) @@ -702,13 +821,16 @@ static int sock_ep_cm_accept(struct fid_ep *ep, const void *param, size_t paraml struct sockaddr_in *addr; struct sock_ep *_ep; struct sock_eq *_eq; - int ret; + int ret = 0; _ep = container_of(ep, struct sock_ep, ep); _eq = _ep->eq; if (!_eq || paramlen > SOCK_EP_MAX_CM_DATA_SZ) return -FI_EINVAL; + if (_ep->is_disabled || _ep->cm.shutdown_received) + return -FI_EINVAL; + response = (struct sock_conn_response*)calloc(1, sizeof(*response) + paramlen); if (!response) @@ -717,6 +839,7 @@ static int sock_ep_cm_accept(struct fid_ep *ep, const void *param, size_t paraml req = (struct sock_conn_req *)_ep->info.connreq; if (!req) { SOCK_LOG_ERROR("invalid connreq for cm_accept\n"); + free(response); return -FI_EINVAL; } @@ -725,20 +848,16 @@ static int sock_ep_cm_accept(struct fid_ep *ep, const void *param, size_t paraml memcpy(&response->user_data, param, paramlen); addr = &req->from_addr; + memcpy(&_ep->cm_addr, addr, sizeof(struct sockaddr_in)); + _ep->peer_fid = req->hdr.c_fid; + _ep->rem_ep_id = req->ep_id; response->hdr.type = SOCK_CONN_ACCEPT; response->hdr.s_fid = &ep->fid; response->hdr.s_port = htons(atoi(_ep->domain->service)); - _ep->socket = sock_ep_cm_create_socket(); - if (!_ep->socket) { - ret = -FI_EIO; - goto out; - } - - if (sock_ep_cm_send_msg(_ep->socket, addr, response, - sizeof (*response) + paramlen)) { - close(_ep->socket); + if (sock_ep_cm_enqueue_msg(&_ep->cm, addr, response, + sizeof (*response) + paramlen)) { ret = -FI_EIO; goto out; } @@ -756,6 +875,27 @@ out: return ret; } +int sock_ep_cm_shutdown(struct fid_ep *ep, uint64_t flags) +{ + struct sock_conn_response response; + struct sock_ep *_ep; + + _ep = container_of(ep, struct sock_ep, ep); + memset(&response, 0, sizeof(response)); + + response.hdr.c_fid = _ep->peer_fid; + response.hdr.s_fid = &ep->fid; + response.hdr.type = SOCK_CONN_SHUTDOWN; + + if (sock_ep_cm_enqueue_msg(&_ep->cm, &_ep->cm_addr, &response, + sizeof response)) { + return -FI_EIO; + } + + sock_ep_disable(ep); + return 0; +} + struct fi_ops_cm sock_ep_cm_ops = { .size = sizeof(struct fi_ops_cm), .getname = sock_ep_cm_getname, @@ -764,7 +904,7 @@ struct fi_ops_cm sock_ep_cm_ops = { .listen = fi_no_listen, .accept = sock_ep_cm_accept, .reject = fi_no_reject, - .shutdown = fi_no_shutdown, + .shutdown = sock_ep_cm_shutdown, }; int sock_msg_endpoint(struct fid_domain *domain, struct fi_info *info, @@ -819,7 +959,13 @@ int sock_msg_ep(struct fid_domain *domain, struct fi_info *info, ret = sock_msg_endpoint(domain, info, &endpoint, context, FI_CLASS_EP); if (ret) return ret; - + + if (pthread_create(&endpoint->cm.listener_thread, NULL, + sock_msg_ep_listener_thread, (void *)endpoint)) { + SOCK_LOG_ERROR("Couldn't create listener thread\n"); + return -FI_EINVAL; + } + *ep = &endpoint->ep; return 0; } @@ -850,12 +996,19 @@ static int sock_pep_fi_close(fid_t fid) struct sock_pep *pep; pep = container_of(fid, struct sock_pep, pep.fid); - pep->do_listen = 0; - ret = write(pep->signal_fds[0], &c, 1); + pep->cm.do_listen = 0; + ret = write(pep->cm.signal_fds[0], &c, 1); if (ret != 1) - SOCK_LOG_ERROR("Failed to signal\n"); + SOCK_LOG_INFO("Failed to signal\n"); + + if (pep->cm.listener_thread && + pthread_join(pep->cm.listener_thread, NULL)) { + SOCK_LOG_INFO("pthread join failed\n"); + } + + close(pep->cm.signal_fds[0]); + close(pep->cm.signal_fds[1]); - pthread_join(pep->listener_thread, NULL); free(pep); return 0; } @@ -894,35 +1047,43 @@ static void *sock_pep_listener_thread (void *data) { struct sock_pep *pep = (struct sock_pep *)data; struct sock_conn_req *conn_req = NULL; + struct sock_conn_response *conn_response = NULL; struct fi_eq_cm_entry *cm_entry; struct sockaddr_in from_addr; struct pollfd poll_fds[2]; + struct fid_ep *fid_ep; + struct sock_ep *sock_ep; socklen_t addr_len; - int ret, user_data_sz, entry_sz, tmp; + int ret, user_data_sz, entry_sz; + char tmp = 0; SOCK_LOG_INFO("Starting listener thread for PEP: %p\n", pep); - cm_entry = malloc(sizeof(*cm_entry) + SOCK_EP_MAX_CM_DATA_SZ); + cm_entry = calloc(1, sizeof(*cm_entry) + SOCK_EP_MAX_CM_DATA_SZ); if (!cm_entry) { SOCK_LOG_ERROR("cannot allocate\n"); return NULL; } - poll_fds[0].fd = pep->socket; - poll_fds[1].fd = pep->signal_fds[1]; + poll_fds[0].fd = pep->cm.sock; + poll_fds[1].fd = pep->cm.signal_fds[1]; poll_fds[0].events = poll_fds[1].events = POLLIN; - while((volatile int)pep->do_listen) { + while((volatile int)pep->cm.do_listen) { if (poll(poll_fds, 2, -1) > 0) { if (poll_fds[1].revents & POLLIN) { - read(pep->signal_fds[1], &tmp, 1); + ret = read(pep->cm.signal_fds[1], &tmp, 1); + if (ret != 1) + SOCK_LOG_INFO("Invalid signal\n"); + sock_ep_cm_flush_msg(&pep->cm); continue; } } else { break; } - + + if (conn_req == NULL) { - conn_req = malloc(sizeof(*conn_req) + SOCK_EP_MAX_CM_DATA_SZ); + conn_req = calloc(1, sizeof(*conn_req) + SOCK_EP_MAX_CM_DATA_SZ); if (!conn_req) { SOCK_LOG_ERROR("cannot allocate\n"); break; @@ -930,23 +1091,25 @@ static void *sock_pep_listener_thread (void *data) } addr_len = sizeof(struct sockaddr_in); - ret = recvfrom(pep->socket, (char*)conn_req, + ret = recvfrom(pep->cm.sock, (char*)conn_req, sizeof(*conn_req) + SOCK_EP_MAX_CM_DATA_SZ, 0, (struct sockaddr *) &from_addr, &addr_len); if (ret <= 0) continue; memcpy(&conn_req->from_addr, &from_addr, sizeof(struct sockaddr_in)); - SOCK_LOG_INFO("Msg received: %d\n", ret); - user_data_sz = ret - sizeof(*conn_req); - entry_sz = sizeof(*cm_entry) + user_data_sz; - + SOCK_LOG_INFO("CM msg received: %d\n", ret); memset(cm_entry, 0, sizeof *cm_entry); - if (conn_req->hdr.type == SOCK_CONN_REQ) { + switch (conn_req->hdr.type) { + case SOCK_CONN_REQ: SOCK_LOG_INFO("Received SOCK_CONN_REQ\n"); + + user_data_sz = ret - sizeof(*conn_req); + entry_sz = sizeof(*cm_entry) + user_data_sz; + if (ret < sizeof(*conn_req) || - !sock_ep_cm_send_ack(pep->socket, &from_addr)) { + !sock_ep_cm_send_ack(&pep->cm, &from_addr)) { SOCK_LOG_ERROR("Invalid connection request\n"); break; } @@ -961,16 +1124,37 @@ static void *sock_pep_listener_thread (void *data) if (sock_eq_report_event(pep->eq, FI_CONNREQ, cm_entry, entry_sz, 0)) SOCK_LOG_ERROR("Error in writing to EQ\n"); - } else { + break; + + case SOCK_CONN_SHUTDOWN: + SOCK_LOG_INFO("Received SOCK_CONN_SHUTDOWN\n"); + conn_response = (struct sock_conn_response*)conn_req; + + entry_sz = sizeof(*cm_entry); + cm_entry->fid = conn_response->hdr.c_fid; + fid_ep = container_of(conn_response->hdr.c_fid, + struct fid_ep, fid); + sock_ep = container_of(fid_ep, struct sock_ep, ep); + if (sock_ep->cm.shutdown_received) + break; + + sock_ep->cm.shutdown_received = 1; + if (sock_eq_report_event(sock_ep->eq, FI_SHUTDOWN, cm_entry, + entry_sz, 0)) + SOCK_LOG_ERROR("Error in writing to EQ\n"); + break; + + default: SOCK_LOG_ERROR("Invalid event\n"); + goto out; } } +out: if (conn_req) free(conn_req); free(cm_entry); - close(pep->socket); - pep->socket = 0; + close(pep->cm.sock); return NULL; } @@ -984,7 +1168,7 @@ static int sock_pep_create_listener_thread(struct sock_pep *pep) char sa_ip[INET_ADDRSTRLEN] = {0}; char sa_port[NI_MAXSERV] = {0}; - pep->do_listen = 1; + pep->cm.do_listen = 1; memset(&hints, 0, sizeof(hints)); hints.ai_family = AF_INET; @@ -1003,33 +1187,33 @@ static int sock_pep_create_listener_thread(struct sock_pep *pep) } for (p=s_res; p; p=p->ai_next) { - pep->socket = socket(p->ai_family, p->ai_socktype, + pep->cm.sock = socket(p->ai_family, p->ai_socktype, p->ai_protocol); - if (pep->socket >= 0) { + if (pep->cm.sock >= 0) { optval = 1; - if (setsockopt(pep->socket, SOL_SOCKET, SO_REUSEADDR, &optval, + if (setsockopt(pep->cm.sock, SOL_SOCKET, SO_REUSEADDR, &optval, sizeof optval)) SOCK_LOG_ERROR("setsockopt failed\n"); - if (!bind(pep->socket, s_res->ai_addr, s_res->ai_addrlen)) + if (!bind(pep->cm.sock, s_res->ai_addr, s_res->ai_addrlen)) break; - close(pep->socket); - pep->socket = -1; + close(pep->cm.sock); + pep->cm.sock = -1; } } freeaddrinfo(s_res); - if (pep->socket < 0) + if (pep->cm.sock < 0) return -FI_EIO; optval = 1; - if (setsockopt(pep->socket, SOL_SOCKET, SO_REUSEADDR, &optval, + if (setsockopt(pep->cm.sock, SOL_SOCKET, SO_REUSEADDR, &optval, sizeof optval)) SOCK_LOG_ERROR("setsockopt failed\n"); if (pep->src_addr.sin_port == 0) { addr_size = sizeof(addr); - if (getsockname(pep->socket, (struct sockaddr*)&addr, &addr_size)) + if (getsockname(pep->cm.sock, (struct sockaddr*)&addr, &addr_size)) return -FI_EINVAL; pep->src_addr.sin_port = addr.sin_port; } @@ -1037,7 +1221,7 @@ static int sock_pep_create_listener_thread(struct sock_pep *pep) SOCK_LOG_INFO("Listener thread bound to %s:%d\n", sa_ip, ntohs(pep->src_addr.sin_port)); - if (pthread_create(&pep->listener_thread, NULL, + if (pthread_create(&pep->cm.listener_thread, NULL, sock_pep_listener_thread, (void *)pep)) { SOCK_LOG_ERROR("Couldn't create listener thread\n"); return -FI_EINVAL; @@ -1058,15 +1242,10 @@ static int sock_pep_reject(struct fid_pep *pep, fi_connreq_t connreq, struct sock_conn_req *req; struct sockaddr_in *addr; struct sock_pep *_pep; - struct sock_eq *_eq; struct sock_conn_response *response; int ret = 0; _pep = container_of(pep, struct sock_pep, pep); - _eq = _pep->eq; - if (!_eq || paramlen > SOCK_EP_MAX_CM_DATA_SZ) - return -FI_EINVAL; - req = (struct sock_conn_req *)connreq; if (!req) return 0; @@ -1084,15 +1263,7 @@ static int sock_pep_reject(struct fid_pep *pep, fi_connreq_t connreq, response->hdr.type = SOCK_CONN_REJECT; response->hdr.s_fid = NULL; - if (!_pep->socket) { - _pep->socket = sock_ep_cm_create_socket(); - if (!_pep->socket) { - ret = -FI_EIO; - goto out; - } - } - - if (sock_ep_cm_send_msg(_pep->socket, addr, req, + if (sock_ep_cm_enqueue_msg(&_pep->cm, addr, req, sizeof(struct sock_conn_response))) { ret = -FI_EIO; goto out; @@ -1133,7 +1304,7 @@ int sock_msg_sep(struct fid_domain *domain, struct fi_info *info, int sock_msg_passive_ep(struct fid_fabric *fabric, struct fi_info *info, struct fid_pep **pep, void *context) { - int ret, flags; + int ret = -FI_EINVAL, flags; struct sock_pep *_pep; char hostname[HOST_NAME_MAX]; struct addrinfo sock_hints; @@ -1178,12 +1349,13 @@ int sock_msg_passive_ep(struct fid_fabric *fabric, struct fi_info *info, goto err; } - if(socketpair(AF_UNIX, SOCK_STREAM, 0, _pep->signal_fds) < 0) + if(socketpair(AF_UNIX, SOCK_STREAM, 0, _pep->cm.signal_fds) < 0) goto err; - flags = fcntl(_pep->signal_fds[1], F_GETFL, 0); - if (fcntl(_pep->signal_fds[1], F_SETFL, flags | O_NONBLOCK)) + flags = fcntl(_pep->cm.signal_fds[1], F_GETFL, 0); + if (fcntl(_pep->cm.signal_fds[1], F_SETFL, flags | O_NONBLOCK)) SOCK_LOG_ERROR("fcntl failed"); + dlist_init(&_pep->cm.msg_list); _pep->pep.fid.fclass = FI_CLASS_PEP; _pep->pep.fid.context = context; diff --git a/opal/mca/common/libfabric/libfabric/prov/sockets/src/sock_ep_rdm.c b/opal/mca/common/libfabric/libfabric/prov/sockets/src/sock_ep_rdm.c index 346184d32e..dc1fe5e008 100644 --- a/opal/mca/common/libfabric/libfabric/prov/sockets/src/sock_ep_rdm.c +++ b/opal/mca/common/libfabric/libfabric/prov/sockets/src/sock_ep_rdm.c @@ -57,10 +57,9 @@ #include "sock_util.h" const struct fi_ep_attr sock_rdm_ep_attr = { + .type = FI_EP_RDM, .protocol = FI_PROTO_SOCK_TCP, .max_msg_size = SOCK_EP_MAX_MSG_SZ, - .inject_size = SOCK_EP_MAX_INJECT_SZ, - .total_buffered_recv = SOCK_EP_MAX_BUFF_RECV, .max_order_raw_size = SOCK_EP_MAX_ORDER_RAW_SZ, .max_order_war_size = SOCK_EP_MAX_ORDER_WAR_SZ, .max_order_waw_size = SOCK_EP_MAX_ORDER_WAW_SZ, @@ -93,23 +92,35 @@ static int sock_rdm_verify_rx_attr(const struct fi_rx_attr *attr) if (!attr) return 0; - if ((attr->caps | SOCK_EP_RDM_CAP) != SOCK_EP_RDM_CAP) + if ((attr->caps | SOCK_EP_RDM_CAP) != SOCK_EP_RDM_CAP) { + SOCK_LOG_INFO("Unsupported RDM rx caps\n"); return -FI_ENODATA; + } - if ((attr->op_flags | SOCK_EP_RDM_CAP) != SOCK_EP_RDM_CAP) + if ((attr->op_flags | SOCK_EP_RDM_CAP) != SOCK_EP_RDM_CAP) { + SOCK_LOG_INFO("Unsupported rx op_flags\n"); return -FI_ENODATA; + } - if ((attr->msg_order | SOCK_EP_MSG_ORDER) != SOCK_EP_MSG_ORDER) + if ((attr->msg_order | SOCK_EP_MSG_ORDER) != SOCK_EP_MSG_ORDER) { + SOCK_LOG_INFO("Unsuported rx message order\n"); return -FI_ENODATA; + } - if (attr->total_buffered_recv > sock_rdm_rx_attr.total_buffered_recv) + if (attr->total_buffered_recv > sock_rdm_rx_attr.total_buffered_recv) { + SOCK_LOG_INFO("Buffered receive size too large\n"); return -FI_ENODATA; + } - if (attr->size > sock_rdm_rx_attr.size) + if (attr->size > sock_rdm_rx_attr.size) { + SOCK_LOG_INFO("Rx size too large\n"); return -FI_ENODATA; + } - if (attr->iov_limit > sock_rdm_rx_attr.iov_limit) + if (attr->iov_limit > sock_rdm_rx_attr.iov_limit) { + SOCK_LOG_INFO("Rx iov limit too large\n"); return -FI_ENODATA; + } return 0; } @@ -119,23 +130,35 @@ static int sock_rdm_verify_tx_attr(const struct fi_tx_attr *attr) if (!attr) return 0; - if ((attr->caps | SOCK_EP_RDM_CAP) != SOCK_EP_RDM_CAP) + if ((attr->caps | SOCK_EP_RDM_CAP) != SOCK_EP_RDM_CAP) { + SOCK_LOG_INFO("Unsupported RDM tx caps\n"); return -FI_ENODATA; + } - if ((attr->op_flags | SOCK_EP_RDM_CAP) != SOCK_EP_RDM_CAP) + if ((attr->op_flags | SOCK_EP_RDM_CAP) != SOCK_EP_RDM_CAP) { + SOCK_LOG_INFO("Unsupported rx op_flags\n"); return -FI_ENODATA; + } - if ((attr->msg_order | SOCK_EP_MSG_ORDER) != SOCK_EP_MSG_ORDER) + if ((attr->msg_order | SOCK_EP_MSG_ORDER) != SOCK_EP_MSG_ORDER) { + SOCK_LOG_INFO("Unsupported tx message order\n"); return -FI_ENODATA; + } - if (attr->inject_size > sock_rdm_tx_attr.inject_size) + if (attr->inject_size > sock_rdm_tx_attr.inject_size) { + SOCK_LOG_INFO("Inject size too large\n"); return -FI_ENODATA; + } - if (attr->size > sock_rdm_tx_attr.size) + if (attr->size > sock_rdm_tx_attr.size) { + SOCK_LOG_INFO("Tx size too large\n"); return -FI_ENODATA; + } - if (attr->iov_limit > sock_rdm_tx_attr.iov_limit) + if (attr->iov_limit > sock_rdm_tx_attr.iov_limit) { + SOCK_LOG_INFO("Tx iov limit too large\n"); return -FI_ENODATA; + } return 0; } @@ -144,39 +167,45 @@ int sock_rdm_verify_ep_attr(struct fi_ep_attr *ep_attr, struct fi_tx_attr *tx_attr, struct fi_rx_attr *rx_attr) { + int ret; + if (ep_attr) { switch (ep_attr->protocol) { case FI_PROTO_UNSPEC: case FI_PROTO_SOCK_TCP: break; default: + SOCK_LOG_INFO("Unsupported protocol\n"); return -FI_ENODATA; } - if (ep_attr->max_msg_size > sock_rdm_ep_attr.max_msg_size) - return -FI_ENODATA; - - if (ep_attr->inject_size > sock_rdm_ep_attr.inject_size) - return -FI_ENODATA; - - if (ep_attr->total_buffered_recv > - sock_rdm_ep_attr.total_buffered_recv) + if (ep_attr->max_msg_size > sock_rdm_ep_attr.max_msg_size) { + SOCK_LOG_INFO("Message size too large\n"); return -FI_ENODATA; + } if (ep_attr->max_order_raw_size > - sock_rdm_ep_attr.max_order_raw_size) + sock_rdm_ep_attr.max_order_raw_size) { + SOCK_LOG_INFO("RAW order size too large\n"); return -FI_ENODATA; + } if (ep_attr->max_order_war_size > - sock_rdm_ep_attr.max_order_war_size) + sock_rdm_ep_attr.max_order_war_size) { + SOCK_LOG_INFO("WAR order size too large\n"); return -FI_ENODATA; + } if (ep_attr->max_order_waw_size > - sock_rdm_ep_attr.max_order_waw_size) + sock_rdm_ep_attr.max_order_waw_size) { + SOCK_LOG_INFO("WAW order size too large\n"); return -FI_ENODATA; + } - if ((ep_attr->msg_order | SOCK_EP_MSG_ORDER) != SOCK_EP_MSG_ORDER) + if ((ep_attr->msg_order | SOCK_EP_MSG_ORDER) != SOCK_EP_MSG_ORDER) { + SOCK_LOG_INFO("Unsupported message ordering\n"); return -FI_ENODATA; + } if ((ep_attr->tx_ctx_cnt > SOCK_EP_MAX_TX_CNT) && ep_attr->tx_ctx_cnt != FI_SHARED_CONTEXT) @@ -187,8 +216,13 @@ int sock_rdm_verify_ep_attr(struct fi_ep_attr *ep_attr, return -FI_ENODATA; } - if (sock_rdm_verify_tx_attr(tx_attr) || sock_rdm_verify_rx_attr(rx_attr)) - return -FI_ENODATA; + ret = sock_rdm_verify_tx_attr(tx_attr); + if (ret) + return ret; + + ret = sock_rdm_verify_rx_attr(rx_attr); + if (ret) + return ret; return 0; } @@ -225,17 +259,19 @@ int sock_rdm_getinfo(uint32_t version, const char *node, const char *service, char hostname[HOST_NAME_MAX]; if (!info) - return -FI_EBADFLAGS; + return -FI_EINVAL; *info = NULL; if (version != FI_VERSION(SOCK_MAJOR_VERSION, - SOCK_MINOR_VERSION)) + SOCK_MINOR_VERSION)) { + SOCK_LOG_INFO("Unsupported version\n"); return -FI_ENODATA; + } if (hints) { if ((SOCK_EP_RDM_CAP | hints->caps) != SOCK_EP_RDM_CAP) { - SOCK_LOG_INFO("Cannot support requested options!\n"); + SOCK_LOG_INFO("Unsupported capabilities\n"); return -FI_ENODATA; } @@ -264,7 +300,7 @@ int sock_rdm_getinfo(uint32_t version, const char *node, const char *service, ret = getaddrinfo(node ? node : hostname, service, &sock_hints, &result_ptr); if (ret != 0) { - ret = FI_ENODATA; + ret = -FI_ENODATA; SOCK_LOG_INFO("getaddrinfo failed!\n"); goto err; } @@ -295,7 +331,7 @@ int sock_rdm_getinfo(uint32_t version, const char *node, const char *service, ret = getaddrinfo(node, service, &sock_hints, &result_ptr); if (ret != 0) { - ret = FI_ENODATA; + ret = -FI_ENODATA; SOCK_LOG_INFO("getaddrinfo failed!\n"); goto err; } @@ -331,7 +367,7 @@ int sock_rdm_getinfo(uint32_t version, const char *node, const char *service, result->ai_addrlen); if ( ret != 0) { SOCK_LOG_ERROR("Failed to create udp socket\n"); - ret = FI_ENODATA; + ret = -FI_ENODATA; goto err; } @@ -344,7 +380,7 @@ int sock_rdm_getinfo(uint32_t version, const char *node, const char *service, ret = getsockname(udp_sock, (struct sockaddr*)src_addr, &len); if (ret != 0) { SOCK_LOG_ERROR("getsockname failed\n"); - ret = FI_ENODATA; + ret = -FI_ENODATA; goto err; } close(udp_sock); @@ -354,14 +390,12 @@ int sock_rdm_getinfo(uint32_t version, const char *node, const char *service, } if (hints && hints->src_addr) { - if (!src_addr) { - src_addr = calloc(1, sizeof(struct sockaddr_in)); - if (!src_addr) { - ret = -FI_ENOMEM; - goto err; - } + if(hints->src_addrlen != sizeof(struct sockaddr_in)){ + SOCK_LOG_ERROR("Sockets provider requires src_addrlen to be sizeof(struct sockaddr_in); got %zu\n", + hints->src_addrlen); + ret = -FI_ENODATA; + goto err; } - assert(hints->src_addrlen == sizeof(struct sockaddr_in)); memcpy(src_addr, hints->src_addr, hints->src_addrlen); } @@ -373,7 +407,12 @@ int sock_rdm_getinfo(uint32_t version, const char *node, const char *service, goto err; } } - assert(hints->dest_addrlen == sizeof(struct sockaddr_in)); + if(hints->dest_addrlen != sizeof(struct sockaddr_in)){ + SOCK_LOG_ERROR("Sockets provider requires dest_addrlen to be sizeof(struct sockaddr_in); got %zu\n", + hints->dest_addrlen); + ret = -FI_ENODATA; + goto err; + } memcpy(dest_addr, hints->dest_addr, hints->dest_addrlen); } @@ -391,7 +430,7 @@ int sock_rdm_getinfo(uint32_t version, const char *node, const char *service, _info = sock_rdm_fi_info(hints, src_addr, dest_addr); if (!_info) { - ret = FI_ENOMEM; + ret = -FI_ENOMEM; goto err; } diff --git a/opal/mca/common/libfabric/libfabric/prov/sockets/src/sock_eq.c b/opal/mca/common/libfabric/libfabric/prov/sockets/src/sock_eq.c index 16cafb6dfd..f4d4632a6e 100644 --- a/opal/mca/common/libfabric/libfabric/prov/sockets/src/sock_eq.c +++ b/opal/mca/common/libfabric/libfabric/prov/sockets/src/sock_eq.c @@ -62,6 +62,10 @@ ssize_t sock_eq_sread(struct fid_eq *eq, uint32_t *event, void *buf, size_t len, } if(dlistfd_empty(&sock_eq->list)) { + if(!timeout) { + SOCK_LOG_INFO("Nothing to read from eq!\n"); + return 0; + } ret = dlistfd_wait_avail(&sock_eq->list, timeout); if(ret <= 0) return ret; @@ -132,6 +136,7 @@ out: ssize_t sock_eq_report_event(struct sock_eq *sock_eq, uint32_t event, const void *buf, size_t len, uint64_t flags) { + int ret; struct sock_eq_entry *entry = calloc(1, len + sizeof(struct sock_eq_entry)); if(!entry) @@ -142,6 +147,7 @@ ssize_t sock_eq_report_event(struct sock_eq *sock_eq, uint32_t event, entry->type = event; entry->len = len; entry->flags = flags; + ret = entry->len; memcpy(entry->event, buf, len); dlistfd_insert_tail(&entry->entry, &sock_eq->list); @@ -149,7 +155,7 @@ ssize_t sock_eq_report_event(struct sock_eq *sock_eq, uint32_t event, sock_wait_signal(sock_eq->waitset); fastlock_release(&sock_eq->lock); - return 0; + return ret; } ssize_t sock_eq_report_error(struct sock_eq *sock_eq, fid_t fid, void *context, @@ -168,7 +174,7 @@ ssize_t sock_eq_report_error(struct sock_eq *sock_eq, fid_t fid, void *context, err_entry->context = context; err_entry->err = err; err_entry->prov_errno = prov_errno; - err_entry->err_data = err_data; + err_entry->err_data = err_data; entry->len = sizeof(struct fi_eq_err_entry); dlistfd_insert_tail(&entry->entry, &sock_eq->err_list); @@ -414,8 +420,10 @@ int sock_eq_open(struct fid_fabric *fabric, struct fi_eq_attr *attr, break; case FI_WAIT_SET: - if (!attr) - return -FI_EINVAL; + if (!attr) { + ret = -FI_EINVAL; + goto err2; + } sock_eq->waitset = attr->wait_set; sock_eq->signal = 1; diff --git a/opal/mca/common/libfabric/libfabric/prov/sockets/src/sock_fabric.c b/opal/mca/common/libfabric/libfabric/prov/sockets/src/sock_fabric.c index ae87207901..7b925c75af 100644 --- a/opal/mca/common/libfabric/libfabric/prov/sockets/src/sock_fabric.c +++ b/opal/mca/common/libfabric/libfabric/prov/sockets/src/sock_fabric.c @@ -75,11 +75,14 @@ int sock_verify_fabric_attr(struct fi_fabric_attr *attr) int sock_verify_info(struct fi_info *hints) { + enum fi_ep_type ep_type; int ret; + if (!hints) return 0; - switch (hints->ep_type) { + ep_type = hints->ep_attr ? hints->ep_attr->type : FI_EP_UNSPEC; + switch (ep_type) { case FI_EP_UNSPEC: case FI_EP_MSG: ret = sock_msg_verify_ep_attr(hints->ep_attr, @@ -182,8 +185,8 @@ static int sock_getinfo(uint32_t version, const char *node, const char *service, if (ret) return ret; - if (hints) { - switch (hints->ep_type) { + if (hints && hints->ep_attr) { + switch (hints->ep_attr->type) { case FI_EP_RDM: return sock_rdm_getinfo(version, node, service, flags, hints, info); diff --git a/opal/mca/common/libfabric/libfabric/prov/sockets/src/sock_msg.c b/opal/mca/common/libfabric/libfabric/prov/sockets/src/sock_msg.c index 253e52df4f..29b8bbe0c9 100644 --- a/opal/mca/common/libfabric/libfabric/prov/sockets/src/sock_msg.c +++ b/opal/mca/common/libfabric/libfabric/prov/sockets/src/sock_msg.c @@ -34,6 +34,7 @@ # include #endif /* HAVE_CONFIG_H */ +#include #include #include #include diff --git a/opal/mca/common/libfabric/libfabric/prov/sockets/src/sock_progress.c b/opal/mca/common/libfabric/libfabric/prov/sockets/src/sock_progress.c index 5ec963b9c8..5adc9f41b7 100644 --- a/opal/mca/common/libfabric/libfabric/prov/sockets/src/sock_progress.c +++ b/opal/mca/common/libfabric/libfabric/prov/sockets/src/sock_progress.c @@ -34,6 +34,7 @@ # include #endif /* HAVE_CONFIG_H */ +#include #include #include #include @@ -130,6 +131,7 @@ static void sock_pe_release_entry(struct sock_pe *pe, pe_entry->data_len = 0; pe_entry->buf = 0; pe_entry->flags = 0; + pe_entry->context = 0L; dlist_remove(&pe_entry->entry); dlist_insert_head(&pe_entry->entry, &pe->free_list); @@ -410,8 +412,13 @@ static void sock_pe_send_response(struct sock_pe *pe, response->msg_hdr.op_type = op_type; response->msg_hdr.msg_len = htonll(response->msg_hdr.msg_len); response->msg_hdr.rx_id = pe_entry->msg_hdr.rx_id; - response->msg_hdr.ep_id = htons(sock_av_lookup_ep_id(rx_ctx->av, - pe_entry->addr)); + + if (pe_entry->ep && pe_entry->ep->connected) + response->msg_hdr.ep_id = pe_entry->ep->rem_ep_id; + else + response->msg_hdr.ep_id = + sock_av_lookup_ep_id(rx_ctx->av, pe_entry->addr); + response->msg_hdr.ep_id = htons(response->msg_hdr.ep_id); pe->pe_atomic = NULL; pe_entry->done_len = 0; @@ -671,7 +678,8 @@ static int sock_pe_process_rx_write(struct sock_pe *pe, struct sock_rx_ctx *rx_c sock_pe_report_error(pe_entry, rem); goto out; } else { - if (pe_entry->flags & FI_REMOTE_SIGNAL) { + if (pe_entry->flags & FI_REMOTE_SIGNAL || + pe_entry->flags & FI_REMOTE_CQ_DATA) { sock_pe_report_rx_completion(pe_entry); } } @@ -1088,7 +1096,8 @@ static int sock_pe_process_rx_atomic(struct sock_pe *pe, struct sock_rx_ctx *rx_ pe_entry->buf = pe_entry->pe.rx.rx_iov[0].iov.addr; pe_entry->data_len = offset; - if (pe_entry->flags & FI_REMOTE_SIGNAL) { + if (pe_entry->flags & FI_REMOTE_SIGNAL || + pe_entry->flags & FI_REMOTE_CQ_DATA) { sock_pe_report_rx_completion(pe_entry); } @@ -1468,11 +1477,11 @@ static int sock_pe_read_hdr(struct sock_pe *pe, struct sock_rx_ctx *rx_ctx, } msg_hdr->msg_len = ntohll(msg_hdr->msg_len); - msg_hdr->rx_id = msg_hdr->rx_id; msg_hdr->flags = ntohll(msg_hdr->flags); msg_hdr->pe_entry_id = ntohs(msg_hdr->pe_entry_id); msg_hdr->ep_id = ntohs(msg_hdr->ep_id); pe_entry->pe.rx.header_read = 1; + pe_entry->flags = msg_hdr->flags; SOCK_LOG_INFO("PE RX (Hdr read): MsgLen: %" PRIu64 ", TX-ID: %d, Type: %d\n", msg_hdr->msg_len, msg_hdr->rx_id, msg_hdr->op_type); @@ -1551,6 +1560,7 @@ static int sock_pe_progress_tx_atomic(struct sock_pe *pe, SOCK_LOG_INFO("Send complete\n"); } sock_comm_flush(pe_entry->conn); + pe_entry->msg_hdr.flags = pe_entry->flags; return 0; } @@ -1609,6 +1619,7 @@ static int sock_pe_progress_tx_write(struct sock_pe *pe, SOCK_LOG_INFO("Send complete\n"); } sock_comm_flush(pe_entry->conn); + pe_entry->msg_hdr.flags = pe_entry->flags; return 0; } @@ -1644,6 +1655,7 @@ static int sock_pe_progress_tx_read(struct sock_pe *pe, SOCK_LOG_INFO("Send complete\n"); } sock_comm_flush(pe_entry->conn); + pe_entry->msg_hdr.flags = pe_entry->flags; return 0; } @@ -1691,6 +1703,7 @@ static int sock_pe_progress_tx_send(struct sock_pe *pe, } sock_comm_flush(pe_entry->conn); + pe_entry->msg_hdr.flags = pe_entry->flags; if (pe_entry->done_len == pe_entry->total_len) { pe_entry->pe.tx.send_done = 1; pe_entry->conn->tx_pe_entry = NULL; @@ -2111,7 +2124,8 @@ int sock_pe_progress_rx_ctx(struct sock_pe *pe, struct sock_rx_ctx *rx_ctx) struct dlist_entry *entry; struct sock_pe_entry *pe_entry; - fastlock_acquire(&pe->lock); + if (fastlock_acquire(&pe->lock)) + return 0; /* progress buffered recvs */ fastlock_acquire(&rx_ctx->lock); @@ -2160,7 +2174,8 @@ int sock_pe_progress_tx_ctx(struct sock_pe *pe, struct sock_tx_ctx *tx_ctx) struct dlist_entry *entry; struct sock_pe_entry *pe_entry; - fastlock_acquire(&pe->lock); + if (fastlock_acquire(&pe->lock)) + return 0; /* check tx_ctx rbuf */ fastlock_acquire(&tx_ctx->rlock); diff --git a/opal/mca/common/libfabric/libfabric/prov/sockets/src/sock_rma.c b/opal/mca/common/libfabric/libfabric/prov/sockets/src/sock_rma.c index c5d7d88678..bd3f8d326e 100644 --- a/opal/mca/common/libfabric/libfabric/prov/sockets/src/sock_rma.c +++ b/opal/mca/common/libfabric/libfabric/prov/sockets/src/sock_rma.c @@ -34,6 +34,7 @@ # include #endif /* HAVE_CONFIG_H */ +#include #include #include #include diff --git a/opal/mca/common/libfabric/libfabric/prov/sockets/src/sock_wait.c b/opal/mca/common/libfabric/libfabric/prov/sockets/src/sock_wait.c index d46af7c8d0..3ad13f4451 100644 --- a/opal/mca/common/libfabric/libfabric/prov/sockets/src/sock_wait.c +++ b/opal/mca/common/libfabric/libfabric/prov/sockets/src/sock_wait.c @@ -170,12 +170,15 @@ void sock_wait_signal(struct fid_wait *wait_fid) { struct sock_wait *wait; static char c = 'a'; + int ret; wait = container_of(wait_fid, struct sock_wait, wait_fid); switch (wait->type) { case FI_WAIT_FD: - write(wait->wobj.fd[WAIT_WRITE_FD], &c, 1); + ret = write(wait->wobj.fd[WAIT_WRITE_FD], &c, 1); + if (ret != 1) + SOCK_LOG_ERROR("failed to signal\n"); break; case FI_WAIT_MUTEX_COND: diff --git a/opal/mca/common/libfabric/libfabric/prov/usnic/src/usdf.h b/opal/mca/common/libfabric/libfabric/prov/usnic/src/usdf.h index ef19fd11b3..eba56a167b 100644 --- a/opal/mca/common/libfabric/libfabric/prov/usnic/src/usdf.h +++ b/opal/mca/common/libfabric/libfabric/prov/usnic/src/usdf.h @@ -396,7 +396,7 @@ int usdf_av_open(struct fid_domain *domain, struct fi_av_attr *attr, /* fi_ops_mr */ -int usdf_reg_mr(struct fid_domain *domain, const void *buf, size_t len, +int usdf_reg_mr(struct fid *fid, const void *buf, size_t len, uint64_t access, uint64_t offset, uint64_t requested_key, uint64_t flags, struct fid_mr **mr_o, void *context); diff --git a/opal/mca/common/libfabric/libfabric/prov/usnic/src/usdf_av.c b/opal/mca/common/libfabric/libfabric/prov/usnic/src/usdf_av.c index 9e6e1b57f4..7a8b4e7626 100644 --- a/opal/mca/common/libfabric/libfabric/prov/usnic/src/usdf_av.c +++ b/opal/mca/common/libfabric/libfabric/prov/usnic/src/usdf_av.c @@ -113,6 +113,8 @@ usdf_post_insert_request_error(struct usdf_av_insert *insert, err_entry.context = insert->avi_context; err_entry.data = req - (struct usdf_av_req *)(insert + 1); err_entry.err = -req->avr_status; + err_entry.err_data = NULL; + err_entry.err_data_size = 0; usdf_eq_write_internal(av->av_eq, 0, &err_entry, sizeof(err_entry), diff --git a/opal/mca/common/libfabric/libfabric/prov/usnic/src/usdf_cm.c b/opal/mca/common/libfabric/libfabric/prov/usnic/src/usdf_cm.c index 7622da1261..7e0cb0165b 100644 --- a/opal/mca/common/libfabric/libfabric/prov/usnic/src/usdf_cm.c +++ b/opal/mca/common/libfabric/libfabric/prov/usnic/src/usdf_cm.c @@ -234,6 +234,7 @@ usdf_cm_msg_connreq_failed(struct usdf_connreq *crp, int error) err.err = -error; err.prov_errno = 0; err.err_data = NULL; + err.err_data_size = 0; usdf_eq_write_internal(eq, 0, &err, sizeof(err), USDF_EVENT_FLAG_ERROR); usdf_cm_msg_connreq_cleanup(crp); diff --git a/opal/mca/common/libfabric/libfabric/prov/usnic/src/usdf_endpoint.c b/opal/mca/common/libfabric/libfabric/prov/usnic/src/usdf_endpoint.c index 8afcfafc2b..37c0b396a2 100644 --- a/opal/mca/common/libfabric/libfabric/prov/usnic/src/usdf_endpoint.c +++ b/opal/mca/common/libfabric/libfabric/prov/usnic/src/usdf_endpoint.c @@ -87,7 +87,7 @@ int usdf_endpoint_open(struct fid_domain *domain, struct fi_info *info, struct fid_ep **ep_o, void *context) { - switch (info->ep_type) { + switch (info->ep_attr->type) { case FI_EP_DGRAM: return usdf_ep_dgram_open(domain, info, ep_o, context); case FI_EP_MSG: diff --git a/opal/mca/common/libfabric/libfabric/prov/usnic/src/usdf_ep_msg.c b/opal/mca/common/libfabric/libfabric/prov/usnic/src/usdf_ep_msg.c index 0a4d41859b..d15385c114 100644 --- a/opal/mca/common/libfabric/libfabric/prov/usnic/src/usdf_ep_msg.c +++ b/opal/mca/common/libfabric/libfabric/prov/usnic/src/usdf_ep_msg.c @@ -84,6 +84,7 @@ usdf_tx_msg_enable(struct usdf_tx *tx) return -FI_ENOCQ; } + USDF_INFO("allocating 1 QP for FI_EP_MSG TX context\n"); /* XXX temp until we can allocate WQ and RQ independently */ filt.uf_type = USD_FTY_UDP; filt.uf_filter.uf_udp.u_port = 0; @@ -97,6 +98,7 @@ usdf_tx_msg_enable(struct usdf_tx *tx) &filt, &tx->tx_qp); if (ret != 0) { + USDF_INFO("QP allocation failed (%s)\n", strerror(-ret)); goto fail; } tx->tx_qp->uq_context = tx; @@ -106,6 +108,7 @@ usdf_tx_msg_enable(struct usdf_tx *tx) sizeof(struct usdf_msg_qe)); if (tx->t.msg.tx_wqe_buf == NULL) { ret = -errno; + USDF_INFO("malloc failed (%s)\n", strerror(-ret)); goto fail; } @@ -151,6 +154,7 @@ usdf_rx_msg_enable(struct usdf_rx *rx) return -FI_ENOCQ; } + USDF_INFO("allocating 1 QP for FI_EP_MSG RX context\n"); /* XXX temp until we can allocate WQ and RQ independently */ filt.uf_type = USD_FTY_UDP; filt.uf_filter.uf_udp.u_port = 0; @@ -164,6 +168,7 @@ usdf_rx_msg_enable(struct usdf_rx *rx) &filt, &rx->rx_qp); if (ret != 0) { + USDF_INFO("QP allocation failed (%s)\n", strerror(-ret)); goto fail; } rx->rx_qp->uq_context = rx; @@ -175,6 +180,7 @@ usdf_rx_msg_enable(struct usdf_rx *rx) qp->uq_rq.urq_num_entries * mtu, (void **)&rx->r.msg.rx_bufs); if (ret != 0) { + USDF_INFO("usd_alloc_mr failed (%s)\n", strerror(-ret)); goto fail; } @@ -190,6 +196,7 @@ usdf_rx_msg_enable(struct usdf_rx *rx) sizeof(struct usdf_msg_qe)); if (rx->r.msg.rx_rqe_buf == NULL) { ret = -errno; + USDF_INFO("malloc failed (%s)\n", strerror(-ret)); goto fail; } @@ -681,7 +688,7 @@ usdf_ep_msg_open(struct fid_domain *domain, struct fi_info *info, ep->ep_domain = udp; ep->ep_caps = info->caps; ep->ep_mode = info->mode; - ep->e.msg.ep_connreq = info->connreq; + ep->e.msg.ep_connreq = (struct usdf_connreq *)info->connreq; ep->e.msg.ep_seq_credits = USDF_RUDP_SEQ_CREDITS; TAILQ_INIT(&ep->e.msg.ep_posted_wqe); diff --git a/opal/mca/common/libfabric/libfabric/prov/usnic/src/usdf_fabric.c b/opal/mca/common/libfabric/libfabric/prov/usnic/src/usdf_fabric.c index bf7c1ba148..c6e7702450 100644 --- a/opal/mca/common/libfabric/libfabric/prov/usnic/src/usdf_fabric.c +++ b/opal/mca/common/libfabric/libfabric/prov/usnic/src/usdf_fabric.c @@ -201,7 +201,7 @@ usdf_fill_info_dgram( size_t entries; int ret; - fi = fi_allocinfo_internal(); + fi = fi_allocinfo(); if (fi == NULL) { ret = -FI_ENOMEM; goto fail; @@ -228,7 +228,7 @@ usdf_fill_info_dgram( fi->mode = USDF_DGRAM_SUPP_MODE; addr_format = FI_FORMAT_UNSPEC; } - fi->ep_type = FI_EP_DGRAM; + fi->ep_attr->type = FI_EP_DGRAM; ret = usdf_fill_addr_info(fi, addr_format, src, dest, dap); if (ret != 0) { @@ -365,7 +365,7 @@ usdf_fill_info_msg( uint32_t addr_format; int ret; - fi = fi_allocinfo_internal(); + fi = fi_allocinfo(); if (fi == NULL) { ret = -FI_ENOMEM; goto fail; @@ -392,7 +392,7 @@ usdf_fill_info_msg( fi->mode = USDF_MSG_SUPP_MODE; addr_format = FI_FORMAT_UNSPEC; } - fi->ep_type = FI_EP_MSG; + fi->ep_attr->type = FI_EP_MSG; ret = usdf_fill_addr_info(fi, addr_format, src, dest, dap); @@ -470,7 +470,7 @@ usdf_fill_info_rdm( uint32_t addr_format; int ret; - fi = fi_allocinfo_internal(); + fi = fi_allocinfo(); if (fi == NULL) { ret = -FI_ENOMEM; goto fail; @@ -496,7 +496,7 @@ usdf_fill_info_rdm( fi->mode = USDF_RDM_SUPP_MODE; addr_format = FI_FORMAT_UNSPEC; } - fi->ep_type = FI_EP_RDM; + fi->ep_attr->type = FI_EP_RDM; ret = usdf_fill_addr_info(fi, addr_format, src, dest, dap); if (ret != 0) { @@ -712,7 +712,8 @@ usdf_getinfo(uint32_t version, const char *node, const char *service, continue; } - ep_type = hints->ep_type; + ep_type = hints->ep_attr ? hints->ep_attr->type : + FI_EP_UNSPEC; } else { ep_type = FI_EP_UNSPEC; } diff --git a/opal/mca/common/libfabric/libfabric/prov/usnic/src/usdf_mem.c b/opal/mca/common/libfabric/libfabric/prov/usnic/src/usdf_mem.c index 924d1db48d..4c5cda07f7 100644 --- a/opal/mca/common/libfabric/libfabric/prov/usnic/src/usdf_mem.c +++ b/opal/mca/common/libfabric/libfabric/prov/usnic/src/usdf_mem.c @@ -80,18 +80,26 @@ static struct fi_ops usdf_mr_ops = { }; int -usdf_reg_mr(struct fid_domain *domain, const void *buf, size_t len, +usdf_reg_mr(struct fid *fid, const void *buf, size_t len, uint64_t access, uint64_t offset, uint64_t requested_key, uint64_t flags, struct fid_mr **mr_o, void *context) { struct usdf_mr *mr; struct usdf_domain *udp; int ret; + struct fid_domain *domain; if (flags != 0) { return -FI_EBADFLAGS; } + if (fid->fclass != FI_CLASS_DOMAIN) { + USDF_DEBUG("memory registration only supported " + "for struct fid_domain\n"); + return -FI_EINVAL; + } + domain = container_of(fid, struct fid_domain, fid); + mr = calloc(1, sizeof *mr); if (mr == NULL) { return -FI_ENOMEM; diff --git a/opal/mca/common/libfabric/libfabric/prov/usnic/src/usdf_pep.c b/opal/mca/common/libfabric/libfabric/prov/usnic/src/usdf_pep.c index 24632fedff..a65ffadabc 100644 --- a/opal/mca/common/libfabric/libfabric/prov/usnic/src/usdf_pep.c +++ b/opal/mca/common/libfabric/libfabric/prov/usnic/src/usdf_pep.c @@ -115,14 +115,14 @@ usdf_pep_conn_info(struct usdf_connreq *crp) /* no domains yet, make an info suitable for creating one */ } else { - ip = fi_allocinfo_internal(); + ip = fi_allocinfo(); if (ip == NULL) { return NULL; } ip->caps = USDF_MSG_CAPS; ip->mode = USDF_MSG_SUPP_MODE; - ip->ep_type = FI_EP_MSG; + ip->ep_attr->type = FI_EP_MSG; ip->addr_format = FI_SOCKADDR_IN; ip->src_addrlen = sizeof(struct sockaddr_in); @@ -157,7 +157,7 @@ usdf_pep_conn_info(struct usdf_connreq *crp) sin->sin_port = reqp->creq_port; ip->dest_addr = sin; - ip->connreq = crp; + ip->connreq = (fi_connreq_t)crp; return ip; fail: fi_freeinfo(ip); @@ -451,7 +451,7 @@ usdf_pep_open(struct fid_fabric *fabric, struct fi_info *info, int ret; int optval; - if (info->ep_type != FI_EP_MSG) { + if (info->ep_attr->type != FI_EP_MSG) { return -FI_ENODEV; } diff --git a/opal/mca/common/libfabric/libfabric/prov/verbs/src/fi_verbs.c b/opal/mca/common/libfabric/libfabric/prov/verbs/src/fi_verbs.c index 3dadf03d49..47a2b2b901 100644 --- a/opal/mca/common/libfabric/libfabric/prov/verbs/src/fi_verbs.c +++ b/opal/mca/common/libfabric/libfabric/prov/verbs/src/fi_verbs.c @@ -72,10 +72,11 @@ #define VERBS_IB_PREFIX "IB-0x" #define VERBS_IWARP_FABRIC "Ethernet-iWARP" #define VERBS_ANY_FABRIC "Any RDMA fabric" +#define VERBS_CM_DATA_SIZE 56 #define VERBS_CAPS (FI_MSG | FI_RMA | FI_ATOMICS | FI_READ | FI_WRITE | \ FI_SEND | FI_RECV | FI_REMOTE_READ | FI_REMOTE_WRITE | \ - FI_REMOTE_CQ_DATA | FI_REMOTE_SIGNAL) + FI_REMOTE_SIGNAL) #define VERBS_MODE (FI_LOCAL_MR | FI_PROV_MR_ATTR) #define VERBS_MSG_ORDER (FI_ORDER_RAR | FI_ORDER_RAW | FI_ORDER_RAS | \ FI_ORDER_WAW | FI_ORDER_WAS | FI_ORDER_SAW | FI_ORDER_SAS ) @@ -153,9 +154,9 @@ const struct fi_domain_attr verbs_domain_attr = { }; const struct fi_ep_attr verbs_ep_attr = { + .type = FI_EP_MSG, .protocol_version = 1, .max_msg_size = VERBS_MSG_SIZE, - .total_buffered_recv = 0, .msg_prefix_size = 0, .max_order_raw_size = VERBS_MSG_SIZE, .max_order_war_size = 0, @@ -205,11 +206,15 @@ static int fi_ibv_check_fabric_attr(struct fi_fabric_attr *attr) { if (attr->name && !(!strcmp(attr->name, VERBS_ANY_FABRIC) || !strncmp(attr->name, VERBS_IB_PREFIX, strlen(VERBS_IB_PREFIX)) || - !strcmp(attr->name, VERBS_IWARP_FABRIC))) + !strcmp(attr->name, VERBS_IWARP_FABRIC))) { + VERBS_INFO("Unknown fabric name\n"); return -FI_ENODATA; + } - if (attr->prov_version > VERBS_PROV_VERS) + if (attr->prov_version > VERBS_PROV_VERS) { + VERBS_INFO("Unsupported provider version\n"); return -FI_ENODATA; + } return 0; } @@ -248,17 +253,30 @@ static int fi_ibv_check_domain_attr(struct fi_domain_attr *attr) return -FI_ENODATA; } - if (attr->mr_key_size > sizeof_field(struct ibv_sge, lkey)) + if (attr->mr_key_size > sizeof_field(struct ibv_sge, lkey)) { + VERBS_INFO("MR key size too large\n"); return -FI_ENODATA; + } - if (attr->cq_data_size > sizeof_field(struct ibv_send_wr, imm_data)) + if (attr->cq_data_size > sizeof_field(struct ibv_send_wr, imm_data)) { + VERBS_INFO("CQ data size too large\n"); return -FI_ENODATA; + } return 0; } static int fi_ibv_check_ep_attr(struct fi_ep_attr *attr) { + switch (attr->type) { + case FI_EP_UNSPEC: + case FI_EP_MSG: + break; + default: + VERBS_INFO("Unsupported endpoint type\n"); + return -FI_ENODATA; + } + switch (attr->protocol) { case FI_PROTO_UNSPEC: case FI_PROTO_RDMA_CM_IB_RC: @@ -266,17 +284,17 @@ static int fi_ibv_check_ep_attr(struct fi_ep_attr *attr) case FI_PROTO_IB_UD: break; default: + VERBS_INFO("Unsupported protocol\n"); return -FI_ENODATA; } - if (attr->protocol_version > 1) + if (attr->protocol_version > 1) { + VERBS_INFO("Unsupported protocol version\n"); return -FI_ENODATA; + } - if (attr->max_msg_size > verbs_ep_attr.max_msg_size) - return -FI_ENODATA; - - if (attr->total_buffered_recv) { - VERBS_INFO("Buffered Recv not supported\n"); + if (attr->max_msg_size > verbs_ep_attr.max_msg_size) { + VERBS_INFO("Max message size too large\n"); return -FI_ENODATA; } @@ -313,14 +331,15 @@ static int fi_ibv_check_ep_attr(struct fi_ep_attr *attr) return 0; } -static int fi_ibv_check_rx_attr(struct fi_rx_attr *attr) +static int fi_ibv_check_rx_attr(struct fi_rx_attr *attr, struct fi_info *info) { if (attr->caps & ~(verbs_rx_attr.caps)) { VERBS_INFO("Given rx_attr->caps not supported\n"); return -FI_ENODATA; } - if ((attr->mode & verbs_rx_attr.mode) != verbs_rx_attr.mode) { + if (((attr->mode ? attr->mode : info->mode) & + verbs_rx_attr.mode) != verbs_rx_attr.mode) { VERBS_INFO("Given rx_attr->mode not supported\n"); return -FI_ENODATA; } @@ -338,14 +357,15 @@ static int fi_ibv_check_rx_attr(struct fi_rx_attr *attr) return 0; } -static int fi_ibv_check_tx_attr(struct fi_tx_attr *attr) +static int fi_ibv_check_tx_attr(struct fi_tx_attr *attr, struct fi_info *info) { if (attr->caps & ~(verbs_tx_attr.caps)) { VERBS_INFO("Given tx_attr->caps not supported\n"); return -FI_ENODATA; } - if ((attr->mode & verbs_tx_attr.mode) != verbs_tx_attr.mode) { + if (((attr->mode ? attr->mode : info->mode) & + verbs_tx_attr.mode) != verbs_tx_attr.mode) { VERBS_INFO("Given tx_attr->mode not supported\n"); return -FI_ENODATA; } @@ -367,19 +387,15 @@ static int fi_ibv_check_info(struct fi_info *info) { int ret; - switch (info->ep_type) { - case FI_EP_UNSPEC: - case FI_EP_MSG: - break; - default: + if (info->caps && (info->caps & ~VERBS_CAPS)) { + VERBS_INFO("Unsupported capabilities\n"); return -FI_ENODATA; } - if (info->caps && (info->caps & ~VERBS_CAPS)) - return -FI_ENODATA; - - if ((info->mode & VERBS_MODE) != VERBS_MODE) + if ((info->mode & VERBS_MODE) != VERBS_MODE) { + VERBS_INFO("Required mode bits not set\n"); return -FI_ENODATA; + } if (info->fabric_attr) { ret = fi_ibv_check_fabric_attr(info->fabric_attr); @@ -400,13 +416,13 @@ static int fi_ibv_check_info(struct fi_info *info) } if (info->rx_attr) { - ret = fi_ibv_check_rx_attr(info->rx_attr); + ret = fi_ibv_check_rx_attr(info->rx_attr, info); if (ret) return ret; } if (info->tx_attr) { - ret = fi_ibv_check_tx_attr(info->tx_attr); + ret = fi_ibv_check_tx_attr(info->tx_attr, info); if (ret) return ret; } @@ -467,12 +483,11 @@ static int fi_ibv_fi_to_rai(struct fi_info *fi, uint64_t flags, struct rdma_addr return 0; } -static int fi_ibv_rai_to_fi(struct rdma_addrinfo *rai, struct fi_info *hints, - struct fi_info *fi) +static int fi_ibv_rai_to_fi(struct rdma_addrinfo *rai, struct fi_info *fi) { fi->caps = VERBS_CAPS; fi->mode = VERBS_MODE; - fi->ep_type = FI_EP_MSG; + fi->ep_attr->type = FI_EP_MSG; if (rai->ai_src_len) { if (!(fi->src_addr = malloc(rai->ai_src_len))) @@ -570,19 +585,16 @@ static int fi_ibv_fill_info_attr(struct ibv_context *ctx, struct fi_info *hints, fi->ep_attr->protocol_version = 1; fi->ep_attr->max_msg_size = port_attr.max_msg_sz; - // TODO Give a real size once verbs provider supports inject - fi->ep_attr->inject_size = 0; return 0; } static int -fi_ibv_getepinfo(const char *node, const char *service, +fi_ibv_create_ep(const char *node, const char *service, uint64_t flags, struct fi_info *hints, - struct fi_info **info, struct rdma_cm_id **id) + struct rdma_addrinfo **rai, struct rdma_cm_id **id) { - struct rdma_addrinfo rai_hints, *rai; - struct fi_info *fi; + struct rdma_addrinfo rai_hints, *_rai; int ret; if (hints) { @@ -603,20 +615,11 @@ fi_ibv_getepinfo(const char *node, const char *service, } ret = rdma_getaddrinfo((char *) node, (char *) service, - &rai_hints, &rai); + &rai_hints, &_rai); if (ret) return (errno == ENODEV) ? -FI_ENODATA : -errno; - if (!(fi = fi_allocinfo_internal())) { - ret = -FI_ENOMEM; - goto err1; - } - - ret = fi_ibv_rai_to_fi(rai, hints, fi); - if (ret) - goto err2; - - ret = rdma_create_ep(id, rai, NULL, NULL); + ret = rdma_create_ep(id, _rai, NULL, NULL); if (ret) { ret = -errno; if (ret == -ENOENT) { @@ -625,23 +628,15 @@ fi_ibv_getepinfo(const char *node, const char *service, "skipping verbs provider.\n"); ret = -FI_ENODATA; } - goto err2; + goto err; } - ret = fi_ibv_fill_info_attr((*id)->verbs, hints, fi); - if (ret) - goto err3; - - *info = fi; - rdma_freeaddrinfo(rai); - return 0; - -err3: - rdma_destroy_ep(*id); -err2: - fi_freeinfo(fi); -err1: - rdma_freeaddrinfo(rai); + if (rai) { + *rai = _rai; + return 0; + } +err: + rdma_freeaddrinfo(_rai); return ret; } @@ -649,14 +644,38 @@ static int fi_ibv_getinfo(uint32_t version, const char *node, const char *servic uint64_t flags, struct fi_info *hints, struct fi_info **info) { struct rdma_cm_id *id; + struct rdma_addrinfo *rai; + struct fi_info *fi; int ret; - ret = fi_ibv_getepinfo(node, service, flags, hints, info, &id); + ret = fi_ibv_create_ep(node, service, flags, hints, &rai, &id); if (ret) return ret; + if (!(fi = fi_allocinfo())) { + ret = -FI_ENOMEM; + goto err; + } + + ret = fi_ibv_rai_to_fi(rai, fi); + if (ret) + goto err; + + ret = fi_ibv_fill_info_attr(id->verbs, hints, fi); + if (ret) + goto err; + + *info = fi; + rdma_destroy_ep(id); + rdma_freeaddrinfo(rai); return 0; +err: + if (fi) + fi_freeinfo(fi); + rdma_destroy_ep(id); + rdma_freeaddrinfo(rai); + return ret; } static int fi_ibv_msg_ep_create_qp(struct fi_ibv_msg_ep *ep) @@ -1716,7 +1735,8 @@ static int fi_ibv_msg_ep_reject(struct fid_pep *pep, fi_connreq_t connreq, const void *param, size_t paramlen) { - return rdma_reject(connreq, param, (uint8_t) paramlen) ? -errno : 0; + return rdma_reject((struct rdma_cm_id *) connreq, param, + (uint8_t) paramlen) ? -errno : 0; } static int fi_ibv_msg_ep_shutdown(struct fid_ep *ep, uint64_t flags) @@ -1743,7 +1763,16 @@ fi_ibv_msg_ep_getopt(fid_t fid, int level, int optname, { switch (level) { case FI_OPT_ENDPOINT: - return -FI_ENOPROTOOPT; + switch (optname) { + case FI_OPT_CM_DATA_SIZE: + if (*optlen < sizeof(size_t)) + return -FI_ETOOSMALL; + *((size_t *) optval) = VERBS_CM_DATA_SIZE; + *optlen = sizeof(size_t); + return 0; + default: + return -FI_ENOPROTOOPT; + } default: return -FI_ENOPROTOOPT; } @@ -1831,7 +1860,6 @@ fi_ibv_open_ep(struct fid_domain *domain, struct fi_info *info, { struct fi_ibv_domain *_domain; struct fi_ibv_msg_ep *_ep; - struct fi_info *fi; int ret; _domain = container_of(domain, struct fi_ibv_domain, domain_fid); @@ -1842,15 +1870,13 @@ fi_ibv_open_ep(struct fid_domain *domain, struct fi_info *info, if (!_ep) return -FI_ENOMEM; - fi = NULL; if (!info->connreq) { - ret = fi_ibv_getepinfo(NULL, NULL, 0, info, &fi, &_ep->id); + ret = fi_ibv_create_ep(NULL, NULL, 0, info, NULL, &_ep->id); if (ret) goto err; - fi_freeinfo(fi); } else { - _ep->id = info->connreq; + _ep->id = (struct rdma_cm_id *) info->connreq; } _ep->id->context = &_ep->ep_fid.fid; @@ -1892,11 +1918,11 @@ fi_ibv_eq_cm_getinfo(struct fi_ibv_fabric *fab, struct rdma_cm_event *event) { struct fi_info *fi; - fi = fi_allocinfo_internal(); + fi = fi_allocinfo(); if (!fi) return NULL; - fi->ep_type = FI_EP_MSG; + fi->ep_attr->type = FI_EP_MSG; fi->caps = VERBS_CAPS; fi->src_addrlen = fi_ibv_sockaddr_len(rdma_get_local_addr(event->id)); @@ -1911,7 +1937,7 @@ fi_ibv_eq_cm_getinfo(struct fi_ibv_fabric *fab, struct rdma_cm_event *event) fi_ibv_fill_info_attr(event->id->verbs, NULL, fi); - fi->connreq = event->id; + fi->connreq = (fi_connreq_t) event->id; return fi; err: fi_freeinfo(fi); @@ -2053,7 +2079,7 @@ static int fi_ibv_eq_control(fid_t fid, int command, void *arg) ret = -FI_ENODATA; break; } - *(void **) arg = &eq->channel->fd; + *(int *) arg = eq->channel->fd; break; default: ret = -FI_ENOSYS; @@ -2216,6 +2242,41 @@ fi_ibv_cq_sread(struct fid_cq *cq, void *buf, size_t count, const void *cond, return cur ? cur : ret; } +static uint64_t fi_ibv_comp_flags(struct ibv_wc *wc) +{ + uint64_t flags = 0; + + if (wc->wc_flags & IBV_WC_WITH_IMM) + flags |= FI_REMOTE_CQ_DATA; + + switch (wc->opcode) { + case IBV_WC_SEND: + flags |= FI_SEND | FI_MSG; + break; + case IBV_WC_RDMA_WRITE: + flags |= FI_RMA | FI_WRITE; + break; + case IBV_WC_RDMA_READ: + flags |= FI_RMA | FI_READ; + break; + case IBV_WC_COMP_SWAP: + flags |= FI_ATOMIC; + break; + case IBV_WC_FETCH_ADD: + flags |= FI_ATOMIC; + break; + case IBV_WC_RECV: + flags |= FI_RECV | FI_MSG; + break; + case IBV_WC_RECV_RDMA_WITH_IMM: + flags |= FI_RMA | FI_REMOTE_WRITE; + break; + default: + break; + } + return flags; +} + static ssize_t fi_ibv_cq_read_context(struct fid_cq *cq, void *buf, size_t count) { struct fi_ibv_cq *_cq; @@ -2264,7 +2325,7 @@ static ssize_t fi_ibv_cq_read_msg(struct fid_cq *cq, void *buf, size_t count) } entry->op_context = (void *) (uintptr_t) _cq->wc.wr_id; - entry->flags = (uint64_t) _cq->wc.wc_flags; + entry->flags = fi_ibv_comp_flags(&_cq->wc); entry->len = (uint64_t) _cq->wc.byte_len; entry += 1; } @@ -2293,11 +2354,10 @@ static ssize_t fi_ibv_cq_read_data(struct fid_cq *cq, void *buf, size_t count) } entry->op_context = (void *) (uintptr_t) _cq->wc.wr_id; + entry->flags = fi_ibv_comp_flags(&_cq->wc); if (_cq->wc.wc_flags & IBV_WC_WITH_IMM) { - entry->flags = FI_REMOTE_CQ_DATA; entry->data = _cq->wc.imm_data; } else { - entry->flags = 0; entry->data = 0; } if (_cq->wc.opcode & (IBV_WC_RECV | IBV_WC_RECV_RDMA_WITH_IMM)) @@ -2365,7 +2425,7 @@ static int fi_ibv_cq_control(fid_t fid, int command, void *arg) ret = -FI_ENODATA; break; } - *(void **) arg = &cq->channel->fd; + *(int *) arg = cq->channel->fd; break; default: ret = -FI_ENOSYS; @@ -2508,16 +2568,22 @@ static struct fi_ops fi_ibv_mr_ops = { }; static int -fi_ibv_mr_reg(struct fid_domain *domain, const void *buf, size_t len, +fi_ibv_mr_reg(struct fid *fid, const void *buf, size_t len, uint64_t access, uint64_t offset, uint64_t requested_key, uint64_t flags, struct fid_mr **mr, void *context) { struct fi_ibv_mem_desc *md; int fi_ibv_access; + struct fid_domain *domain; if (flags) return -FI_EBADFLAGS; + if (fid->fclass != FI_CLASS_DOMAIN) { + return -FI_EINVAL; + } + domain = container_of(fid, struct fid_domain, fid); + md = calloc(1, sizeof *md); if (!md) return -FI_ENOMEM; @@ -2718,19 +2784,16 @@ fi_ibv_passive_ep(struct fid_fabric *fabric, struct fi_info *info, struct fid_pep **pep, void *context) { struct fi_ibv_pep *_pep; - struct fi_info *fi; int ret; _pep = calloc(1, sizeof *_pep); if (!_pep) return -FI_ENOMEM; - fi = NULL; - ret = fi_ibv_getepinfo(NULL, NULL, FI_SOURCE, info, &fi, &_pep->id); + ret = fi_ibv_create_ep(NULL, NULL, FI_SOURCE, info, NULL, &_pep->id); if (ret) goto err; - fi_freeinfo(fi); _pep->id->context = &_pep->pep_fid.fid; _pep->pep_fid.fid.fclass = FI_CLASS_PEP; diff --git a/opal/mca/common/libfabric/libfabric/src/common.c b/opal/mca/common/libfabric/libfabric/src/common.c index d910dc1c29..c5a8ba5414 100644 --- a/opal/mca/common/libfabric/libfabric/src/common.c +++ b/opal/mca/common/libfabric/libfabric/src/common.c @@ -46,6 +46,7 @@ #include #include #include +#include #include #include "fi.h" @@ -98,29 +99,6 @@ int fi_poll_fd(int fd, int timeout) return ret == -1 ? -errno : ret; } -struct fi_info *fi_allocinfo_internal(void) -{ - struct fi_info *info; - - info = calloc(1, sizeof(*info)); - if (!info) - return NULL; - - info->tx_attr = calloc(1, sizeof(*info->tx_attr)); - info->rx_attr = calloc(1, sizeof(*info->rx_attr)); - info->ep_attr = calloc(1, sizeof(*info->ep_attr)); - info->domain_attr = calloc(1, sizeof(*info->domain_attr)); - info->fabric_attr = calloc(1, sizeof(*info->fabric_attr)); - if (!info->tx_attr|| !info->rx_attr || !info->ep_attr || - !info->domain_attr || !info->fabric_attr) - goto err; - - return info; -err: - fi_freeinfo(info); - return NULL; -} - uint64_t fi_tag_bits(uint64_t mem_tag_format) { return UINT64_MAX >> (ffsll(htonll(mem_tag_format)) -1); @@ -216,3 +194,11 @@ int fi_rma_target_allowed(uint64_t caps) return 0; } + +uint64_t fi_gettime_ms(void) +{ + struct timeval now; + + gettimeofday(&now, NULL); + return now.tv_sec * 1000 + now.tv_usec / 1000; +} diff --git a/opal/mca/common/libfabric/libfabric/src/enosys.c b/opal/mca/common/libfabric/libfabric/src/enosys.c index 5822d8a028..eb9ebb21df 100644 --- a/opal/mca/common/libfabric/libfabric/src/enosys.c +++ b/opal/mca/common/libfabric/libfabric/src/enosys.c @@ -248,20 +248,20 @@ int fi_no_srx_context(struct fid_domain *domain, struct fi_rx_attr *attr, /* * struct fi_ops_mr */ -int fi_no_mr_reg(struct fid_domain *domain, const void *buf, size_t len, +int fi_no_mr_reg(struct fid *fid, const void *buf, size_t len, uint64_t access, uint64_t offset, uint64_t requested_key, uint64_t flags, struct fid_mr **mr, void *context) { return -FI_ENOSYS; } -int fi_no_mr_regv(struct fid_domain *domain, const struct iovec *iov, +int fi_no_mr_regv(struct fid *fid, const struct iovec *iov, size_t count, uint64_t access, uint64_t offset, uint64_t requested_key, uint64_t flags, struct fid_mr **mr, void *context) { return -FI_ENOSYS; } -int fi_no_mr_regattr(struct fid_domain *domain, const struct fi_mr_attr *attr, +int fi_no_mr_regattr(struct fid *fid, const struct fi_mr_attr *attr, uint64_t flags, struct fid_mr **mr) { return -FI_ENOSYS; diff --git a/opal/mca/common/libfabric/libfabric/src/fabric.c b/opal/mca/common/libfabric/libfabric/src/fabric.c index e6ea50d342..a88899fcc4 100644 --- a/opal/mca/common/libfabric/libfabric/src/fabric.c +++ b/opal/mca/common/libfabric/libfabric/src/fabric.c @@ -297,7 +297,7 @@ int DEFAULT_SYMVER_PRE(fi_getinfo)(uint32_t version, const char *node, const cha if (!prov->provider->getinfo) continue; - if (hints->fabric_attr && hints->fabric_attr->prov_name && + if (hints && hints->fabric_attr && hints->fabric_attr->prov_name && strcmp(prov->provider->name, hints->fabric_attr->prov_name)) continue; @@ -332,11 +332,38 @@ int DEFAULT_SYMVER_PRE(fi_getinfo)(uint32_t version, const char *node, const cha } DEFAULT_SYMVER(fi_getinfo_, fi_getinfo); +static struct fi_info *fi_allocinfo_internal(void) +{ + struct fi_info *info; + + info = calloc(1, sizeof(*info)); + if (!info) + return NULL; + + info->tx_attr = calloc(1, sizeof(*info->tx_attr)); + info->rx_attr = calloc(1, sizeof(*info->rx_attr)); + info->ep_attr = calloc(1, sizeof(*info->ep_attr)); + info->domain_attr = calloc(1, sizeof(*info->domain_attr)); + info->fabric_attr = calloc(1, sizeof(*info->fabric_attr)); + if (!info->tx_attr|| !info->rx_attr || !info->ep_attr || + !info->domain_attr || !info->fabric_attr) + goto err; + + return info; +err: + fi_freeinfo(info); + return NULL; +} + + __attribute__((visibility ("default"))) struct fi_info *DEFAULT_SYMVER_PRE(fi_dupinfo)(const struct fi_info *info) { struct fi_info *dup; + if (!info) + return fi_allocinfo_internal(); + dup = malloc(sizeof(*dup)); if (dup == NULL) { return NULL; diff --git a/opal/mca/common/libfabric/libfabric/src/fi_tostr.c b/opal/mca/common/libfabric/libfabric/src/fi_tostr.c index 8ecaf46692..720c2d667e 100644 --- a/opal/mca/common/libfabric/libfabric/src/fi_tostr.c +++ b/opal/mca/common/libfabric/libfabric/src/fi_tostr.c @@ -179,7 +179,6 @@ static void fi_tostr_caps(char *buf, uint64_t caps) IFFLAGSTR(caps, FI_TAGGED); IFFLAGSTR(caps, FI_ATOMICS); IFFLAGSTR(caps, FI_DYNAMIC_MR); - IFFLAGSTR(caps, FI_BUFFERED_RECV); fi_tostr_flags(buf, caps); fi_remove_comma(buf); @@ -326,12 +325,13 @@ static void fi_tostr_ep_attr(char *buf, const struct fi_ep_attr *attr, const cha } strcatf(buf, "%sfi_ep_attr:\n", prefix); + strcatf(buf, "%sep_type: ", TAB); + fi_tostr_ep_type(buf, attr->type); + strcatf(buf, "\n"); strcatf(buf, "%s%sprotocol: ", prefix, TAB); fi_tostr_protocol(buf, attr->protocol); strcatf(buf, "\n"); strcatf(buf, "%s%smax_msg_size: %zd\n", prefix, TAB, attr->max_msg_size); - strcatf(buf, "%s%sinject_size: %zd\n", prefix, TAB, attr->inject_size); - strcatf(buf, "%s%stotal_buffered_recv: %zd\n", prefix, TAB, attr->total_buffered_recv); strcatf(buf, "%s%smax_order_raw_size: %zd\n", prefix, TAB, attr->max_order_raw_size); strcatf(buf, "%s%smax_order_war_size: %zd\n", prefix, TAB, attr->max_order_war_size); strcatf(buf, "%s%smax_order_waw_size: %zd\n", prefix, TAB, attr->max_order_waw_size); @@ -401,9 +401,6 @@ static void fi_tostr_info(char *buf, const struct fi_info *info) fi_tostr_mode(buf, info->mode); strcatf(buf, " ]\n"); - strcatf(buf, "%sep_type: ", TAB); - fi_tostr_ep_type(buf, info->ep_type); - strcatf(buf, "\n"); strcatf(buf, "%sfi_addr_format: ", TAB); fi_tostr_addr_format(buf, info->addr_format); strcatf(buf, "\n"); @@ -492,6 +489,38 @@ static void fi_tostr_version(char *buf) strcatf(buf, VERSION); } +static void fi_tostr_eq_event(char *buf, int type) +{ + switch (type) { + CASEENUMSTR(FI_NOTIFY); + CASEENUMSTR(FI_CONNREQ); + CASEENUMSTR(FI_CONNECTED); + CASEENUMSTR(FI_SHUTDOWN); + CASEENUMSTR(FI_MR_COMPLETE); + CASEENUMSTR(FI_AV_COMPLETE); + default: + strcatf(buf, "Unknown"); + break; + } +} + +static void fi_tostr_cq_event_flags(char *buf, uint64_t flags) +{ + IFFLAGSTR(flags, FI_SEND); + IFFLAGSTR(flags, FI_RECV); + IFFLAGSTR(flags, FI_RMA); + IFFLAGSTR(flags, FI_ATOMIC); + IFFLAGSTR(flags, FI_MSG); + IFFLAGSTR(flags, FI_TAGGED); + IFFLAGSTR(flags, FI_READ); + IFFLAGSTR(flags, FI_WRITE); + IFFLAGSTR(flags, FI_REMOTE_READ); + IFFLAGSTR(flags, FI_REMOTE_WRITE); + IFFLAGSTR(flags, FI_REMOTE_CQ_DATA); + IFFLAGSTR(flags, FI_MULTI_RECV); + fi_remove_comma(buf); +} + __attribute__((visibility ("default"))) char *DEFAULT_SYMVER_PRE(fi_tostr)(const void *data, enum fi_type datatype) { @@ -572,6 +601,12 @@ char *DEFAULT_SYMVER_PRE(fi_tostr)(const void *data, enum fi_type datatype) case FI_TYPE_VERSION: fi_tostr_version(buf); break; + case FI_TYPE_EQ_EVENT: + fi_tostr_eq_event(buf, enumval); + break; + case FI_TYPE_CQ_EVENT_FLAGS: + fi_tostr_cq_event_flags(buf, val64); + break; default: strcatf(buf, "Unknown type"); break;