diff --git a/opal/mca/common/libfabric/libfabric/README b/opal/mca/common/libfabric/libfabric/README index a9d1b13e1b..22b768c427 100644 --- a/opal/mca/common/libfabric/libfabric/README +++ b/opal/mca/common/libfabric/libfabric/README @@ -1,7 +1,7 @@ This README is for userspace RDMA fabric library. Version Libfabric v1.0.0rc3 -Released on 2015-03-10 +Released on 2015-03-12 Building ======== diff --git a/opal/mca/common/libfabric/libfabric/man/man3/fi_av.3 b/opal/mca/common/libfabric/libfabric/man/man3/fi_av.3 index ec56fe8fc8..3aec8bd9e4 100644 --- a/opal/mca/common/libfabric/libfabric/man/man3/fi_av.3 +++ b/opal/mca/common/libfabric/libfabric/man/man3/fi_av.3 @@ -1,4 +1,4 @@ -.TH fi_av 3 "2015\-03\-09" "Libfabric Programmer\[aq]s Manual" "\@VERSION\@" +.TH fi_av 3 "2015\-03\-11" "Libfabric Programmer\[aq]s Manual" "\@VERSION\@" .SH NAME .PP fi_av - Address vector operations @@ -201,6 +201,15 @@ operation, even if all addresses fail. The context field in all completions will be the context specified to the insert call, and the data field in the final completion entry will report the number of addresses successfully inserted. +If an error occurs during the asynchronous insertion, an error +completion entry is returned (see \f[C]fi_av\f[](3) for a discussion of +the fi_eq_err_entry error completion struct). +The context field of the error completion will be the context that was +specified in the insert call; the data field will contain the index of +the failed address. +There will be one error completion returned for each address that fails +to insert into the AV. +.PP : \ : If an AV is opened with FI_EVENT, any insertions attempted before an EQ is bound to the AV will fail with -FI_ENOEQ. : \ : Error completions for failed insertions will contain the index of diff --git a/opal/mca/common/libfabric/libfabric/man/man3/fi_eq.3 b/opal/mca/common/libfabric/libfabric/man/man3/fi_eq.3 index 7ea998e170..67bdf632d6 100644 --- a/opal/mca/common/libfabric/libfabric/man/man3/fi_eq.3 +++ b/opal/mca/common/libfabric/libfabric/man/man3/fi_eq.3 @@ -1,4 +1,4 @@ -.TH fi_eq 3 "2015\-02\-23" "Libfabric Programmer\[aq]s Manual" "\@VERSION\@" +.TH fi_eq 3 "2015\-03\-11" "Libfabric Programmer\[aq]s Manual" "\@VERSION\@" .SH NAME .PP fi_eq - Event queue operations @@ -227,7 +227,7 @@ The format of this structure is: struct\ fi_eq_entry\ { \ \ \ \ fid_t\ \ \ \ \ \ \ \ \ \ \ \ fid;\ \ \ \ \ \ \ \ /*\ fid\ associated\ with\ request\ */ \ \ \ \ void\ \ \ \ \ \ \ \ \ \ \ \ *context;\ \ \ \ /*\ operation\ context\ */ -\ \ \ \ uint32_t\ \ \ \ \ \ \ \ \ data;\ \ \ \ \ \ \ /*\ completion\ dependent\ data\ */ +\ \ \ \ uint64_t\ \ \ \ \ \ \ \ \ data;\ \ \ \ \ \ \ /*\ completion-specific\ data\ */ }; \f[] .fi @@ -241,6 +241,9 @@ fid_av. The context field will be set to the context specified as part of the operation, if available, otherwise the context will be associated with the fabric descriptor. +The data field will be set as described in the man page for the +corresponding object type (e.g., see \f[C]fi_av\f[](3) for a description +of how asynchronous address vector insertions are completed). .PP \f[I]Connection Notification\f[] : Connection notifications are connection management notifications used to setup or teardown @@ -340,6 +343,7 @@ The format of this structure is defined below. struct\ fi_eq_err_entry\ { \ \ \ \ fid_t\ \ \ \ \ \ \ \ \ \ \ \ fid;\ \ \ \ \ \ \ \ /*\ fid\ associated\ with\ error\ */ \ \ \ \ void\ \ \ \ \ \ \ \ \ \ \ \ *context;\ \ \ \ /*\ operation\ context\ */ +\ \ \ \ uint64_t\ \ \ \ \ \ \ \ \ data;\ \ \ \ \ \ \ /*\ completion-specific\ data\ */ \ \ \ \ uint32_t\ \ \ \ \ \ \ \ \ index;\ \ \ \ \ \ /*\ index\ for\ vector\ ops\ */ \ \ \ \ int\ \ \ \ \ \ \ \ \ \ \ \ \ \ err;\ \ \ \ \ \ \ \ /*\ positive\ error\ code\ */ \ \ \ \ int\ \ \ \ \ \ \ \ \ \ \ \ \ \ prov_errno;\ /*\ provider\ error\ code\ */ @@ -355,6 +359,10 @@ will reference a fid_av, and CM events will refer to a fid_ep. The context field will be set to the context specified as part of the operation. .PP +The data field will be set as described in the man page for the +corresponding object type (e.g., see \f[C]fi_av\f[](3) for a description +of how asynchronous address vector insertions are completed). +.PP The general reason for the error is provided through the err field. Provider or operational specific error information may also be available through the prov_errno and err_data fields. diff --git a/opal/mca/common/libfabric/libfabric/prov/psm/src/psmx_init.c b/opal/mca/common/libfabric/libfabric/prov/psm/src/psmx_init.c index 51e173358f..ab2f5929fc 100644 --- a/opal/mca/common/libfabric/libfabric/prov/psm/src/psmx_init.c +++ b/opal/mca/common/libfabric/libfabric/prov/psm/src/psmx_init.c @@ -122,12 +122,13 @@ static int psmx_getinfo(uint32_t version, const char *node, const char *service, if (hints->ep_attr) { switch (hints->ep_attr->type) { case FI_EP_UNSPEC: + case FI_EP_DGRAM: case FI_EP_RDM: break; default: - PSMX_DEBUG("hints->ep_attr->type=%d, supported=%d,%d.\n", + PSMX_DEBUG("hints->ep_attr->type=%d, supported=%d,%d,%d.\n", hints->ep_attr->type, FI_EP_UNSPEC, - FI_EP_RDM); + FI_EP_DGRAM, FI_EP_RDM); goto err_out; } diff --git a/opal/mca/common/libfabric/libfabric/prov/usnic/src/usdf_av.c b/opal/mca/common/libfabric/libfabric/prov/usnic/src/usdf_av.c index 7a8b4e7626..8a577f1f38 100644 --- a/opal/mca/common/libfabric/libfabric/prov/usnic/src/usdf_av.c +++ b/opal/mca/common/libfabric/libfabric/prov/usnic/src/usdf_av.c @@ -164,7 +164,7 @@ usdf_av_insert_progress(void *v) ret = usnic_arp_lookup(dap->uda_ifname, req->avr_daddr_be, fp->fab_arp_sockfd, eth); - /* anything besides -EAGAIN means request is completed */ + /* anything besides EAGAIN means request is completed */ if (ret != EAGAIN) { TAILQ_REMOVE(&insert->avi_req_list, req, avr_link); req->avr_status = -ret; diff --git a/opal/mca/common/libfabric/libfabric/prov/usnic/src/usdf_fabric.c b/opal/mca/common/libfabric/libfabric/prov/usnic/src/usdf_fabric.c index c0f95c3d8b..31a5872131 100644 --- a/opal/mca/common/libfabric/libfabric/prov/usnic/src/usdf_fabric.c +++ b/opal/mca/common/libfabric/libfabric/prov/usnic/src/usdf_fabric.c @@ -780,15 +780,23 @@ usdf_fabric_close(fid_t fid) /* Tell progression thread to exit */ fp->fab_exit = 1; - ret = usdf_fabric_wake_thread(fp); - if (ret != 0) { - return ret; + if (fp->fab_thread) { + ret = usdf_fabric_wake_thread(fp); + if (ret != 0) { + return ret; + } + pthread_join(fp->fab_thread, &rv); } - pthread_join(fp->fab_thread, &rv); usdf_timer_deinit(fp); - close(fp->fab_eventfd); - close(fp->fab_epollfd); - close(fp->fab_arp_sockfd); + if (fp->fab_epollfd != -1) { + close(fp->fab_epollfd); + } + if (fp->fab_eventfd != -1) { + close(fp->fab_eventfd); + } + if (fp->fab_arp_sockfd != -1) { + close(fp->fab_arp_sockfd); + } free(fp); return 0; @@ -855,6 +863,7 @@ static int usdf_fabric_open(struct fi_fabric_attr *fattrp, struct fid_fabric **fabric, void *context) { + struct fid_fabric *ff; struct usdf_fabric *fp; struct usdf_usnic_info *dp; struct usdf_dev_entry *dep; @@ -928,6 +937,13 @@ usdf_fabric_open(struct fi_fabric_attr *fattrp, struct fid_fabric **fabric, goto fail; } + /* initialize timer subsystem */ + ret = usdf_timer_init(fp); + if (ret != 0) { + USDF_INFO("unable to initialize timer\n"); + goto fail; + } + ret = pthread_create(&fp->fab_thread, NULL, usdf_fabric_progression_thread, fp); if (ret != 0) { @@ -936,13 +952,6 @@ usdf_fabric_open(struct fi_fabric_attr *fattrp, struct fid_fabric **fabric, goto fail; } - /* initialize timer subsystem */ - ret = usdf_timer_init(fp); - if (ret != 0) { - USDF_INFO("unable to initialize timer\n"); - goto fail; - } - /* create and bind socket for ARP resolution */ memset(&sin, 0, sizeof(sin)); sin.sin_family = AF_INET; @@ -967,19 +976,8 @@ usdf_fabric_open(struct fi_fabric_attr *fattrp, struct fid_fabric **fabric, return 0; fail: - if (fp != NULL) { - if (fp->fab_epollfd != -1) { - close(fp->fab_epollfd); - } - if (fp->fab_eventfd != -1) { - close(fp->fab_eventfd); - } - if (fp->fab_arp_sockfd != -1) { - close(fp->fab_arp_sockfd); - } - usdf_timer_deinit(fp); - free(fp); - } + ff = fab_utof(fp); + usdf_fabric_close(&ff->fid); USDF_DEBUG("returning %d (%s)\n", ret, fi_strerror(-ret)); return ret; } diff --git a/opal/mca/common/libfabric/libfabric/prov/usnic/src/usnic_direct/usnic_ip_utils.c b/opal/mca/common/libfabric/libfabric/prov/usnic/src/usnic_direct/usnic_ip_utils.c index bc07b08f92..aa8a66eae7 100644 --- a/opal/mca/common/libfabric/libfabric/prov/usnic/src/usnic_direct/usnic_ip_utils.c +++ b/opal/mca/common/libfabric/libfabric/prov/usnic/src/usnic_direct/usnic_ip_utils.c @@ -77,6 +77,8 @@ int usnic_arp_lookup(char *ifname, uint32_t ipaddr, int sockfd, uint8_t *macaddr memcpy(macaddr, req.arp_ha.sa_data, 6); else if (status != -1) /* req.arp_flags & ATF_COM == 0 */ err = EAGAIN; + else if (errno == ENXIO) /* ENXIO means no ARP entry was found */ + err = EAGAIN; else /* status == -1 */ err = errno;