From 9d5eeecb8a5d437ea875933048f4506a41c065b3 Mon Sep 17 00:00:00 2001
From: Nathan Hjelm <hjelmn@lanl.gov>
Date: Mon, 28 Mar 2016 10:52:40 -0600
Subject: [PATCH] pml/ob1: detect unreachable errors

This commit adds code to detect when procs are unreachable when using
the dynamic add_procs functionality.

Fixes #1501

Signed-off-by: Nathan Hjelm <hjelmn@lanl.gov>
---
 ompi/mca/bml/r2/bml_r2.c           | 2 ++
 ompi/mca/pml/ob1/pml_ob1_isend.c   | 8 ++++++++
 ompi/mca/pml/ob1/pml_ob1_recvreq.h | 2 ++
 3 files changed, 12 insertions(+)

diff --git a/ompi/mca/bml/r2/bml_r2.c b/ompi/mca/bml/r2/bml_r2.c
index 18fefe5254..e29928f673 100644
--- a/ompi/mca/bml/r2/bml_r2.c
+++ b/ompi/mca/bml/r2/bml_r2.c
@@ -410,6 +410,8 @@ static int mca_bml_r2_add_proc (struct ompi_proc_t *proc)
     }
 
     if (!btl_in_use) {
+        proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML] = NULL;
+        OBJ_RELEASE(bml_endpoint);
         /* no btl is available for this proc */
         if (mca_bml_r2.show_unreach_errors) {
             opal_show_help ("help-mca-bml-r2.txt", "unreachable proc", true,
diff --git a/ompi/mca/pml/ob1/pml_ob1_isend.c b/ompi/mca/pml/ob1/pml_ob1_isend.c
index 893e6cebec..bd5fd767e3 100644
--- a/ompi/mca/pml/ob1/pml_ob1_isend.c
+++ b/ompi/mca/pml/ob1/pml_ob1_isend.c
@@ -140,6 +140,10 @@ int mca_pml_ob1_isend(const void *buf,
     int16_t seqn;
     int rc;
 
+    if (OPAL_UNLIKELY(NULL == endpoint)) {
+        return OMPI_ERR_UNREACH;
+    }
+
     seqn = (uint16_t) OPAL_THREAD_ADD32(&ob1_proc->send_sequence, 1);
 
     if (MCA_PML_BASE_SEND_SYNCHRONOUS != sendmode) {
@@ -189,6 +193,10 @@ int mca_pml_ob1_send(const void *buf,
     int16_t seqn;
     int rc;
 
+    if (OPAL_UNLIKELY(NULL == endpoint)) {
+        return OMPI_ERR_UNREACH;
+    }
+
     if (OPAL_UNLIKELY(MCA_PML_BASE_SEND_BUFFERED == sendmode)) {
         /* large buffered sends *need* a real request so use isend instead */
         ompi_request_t *brequest;
diff --git a/ompi/mca/pml/ob1/pml_ob1_recvreq.h b/ompi/mca/pml/ob1/pml_ob1_recvreq.h
index 71fb8c3d5b..77300a8320 100644
--- a/ompi/mca/pml/ob1/pml_ob1_recvreq.h
+++ b/ompi/mca/pml/ob1/pml_ob1_recvreq.h
@@ -435,6 +435,8 @@ static inline int mca_pml_ob1_recv_request_ack_send(ompi_proc_t* proc,
     mca_bml_base_btl_t* bml_btl;
     mca_bml_base_endpoint_t* endpoint = mca_bml_base_get_endpoint (proc);
 
+    assert (NULL != endpoint);
+
     for(i = 0; i < mca_bml_base_btl_array_get_size(&endpoint->btl_eager); i++) {
         bml_btl = mca_bml_base_btl_array_get_next(&endpoint->btl_eager);
         if(mca_pml_ob1_recv_request_ack_send_btl(proc, bml_btl, hdr_src_req,