* Wrap all the direct error-code checks of the form (OMPI_ERR_* == ret) with
(OMPI_ERR_* = OPAL_SOS_GET_ERR_CODE(ret)), since the return value could be a SOS-encoded error. The OPAL_SOS_GET_ERR_CODE() takes in a SOS error and returns back the native error code. * Since OPAL_SUCCESS is preserved by SOS, also change all calls of the form (OPAL_ERROR == ret) to (OPAL_SUCCESS != ret). We thus avoid having to decode 'ret' to get the native error code. This commit was SVN r23162.
Этот коммит содержится в:
родитель
f7f4dd87ab
Коммит
afbe3e99c6
@ -195,6 +195,8 @@
|
|||||||
#include "ompi/attribute/attribute.h"
|
#include "ompi/attribute/attribute.h"
|
||||||
#include "opal/class/opal_bitmap.h"
|
#include "opal/class/opal_bitmap.h"
|
||||||
#include "opal/threads/mutex.h"
|
#include "opal/threads/mutex.h"
|
||||||
|
#include "opal/util/opal_sos.h"
|
||||||
|
|
||||||
#include "ompi/constants.h"
|
#include "ompi/constants.h"
|
||||||
#include "ompi/datatype/ompi_datatype.h"
|
#include "ompi/datatype/ompi_datatype.h"
|
||||||
#include "ompi/communicator/communicator.h" /* ompi_communicator_t generated in [COPY|DELETE]_ATTR_CALLBACKS */
|
#include "ompi/communicator/communicator.h" /* ompi_communicator_t generated in [COPY|DELETE]_ATTR_CALLBACKS */
|
||||||
@ -1176,7 +1178,7 @@ static int get_value(opal_hash_table_t *attr_hash, int key,
|
|||||||
ret = opal_hash_table_get_value_uint32(keyval_hash, key,
|
ret = opal_hash_table_get_value_uint32(keyval_hash, key,
|
||||||
(void**) &keyval);
|
(void**) &keyval);
|
||||||
|
|
||||||
if (OMPI_ERR_NOT_FOUND == ret) {
|
if (OMPI_ERR_NOT_FOUND == OPAL_SOS_GET_ERROR_CODE(ret)) {
|
||||||
OPAL_THREAD_UNLOCK(&alock);
|
OPAL_THREAD_UNLOCK(&alock);
|
||||||
return MPI_KEYVAL_INVALID;
|
return MPI_KEYVAL_INVALID;
|
||||||
}
|
}
|
||||||
|
@ -31,6 +31,7 @@
|
|||||||
#include "ompi/constants.h"
|
#include "ompi/constants.h"
|
||||||
#include "opal/class/opal_pointer_array.h"
|
#include "opal/class/opal_pointer_array.h"
|
||||||
#include "opal/class/opal_list.h"
|
#include "opal/class/opal_list.h"
|
||||||
|
#include "opal/util/opal_sos.h"
|
||||||
#include "ompi/mca/pml/pml.h"
|
#include "ompi/mca/pml/pml.h"
|
||||||
#include "ompi/mca/coll/base/base.h"
|
#include "ompi/mca/coll/base/base.h"
|
||||||
#include "ompi/request/request.h"
|
#include "ompi/request/request.h"
|
||||||
@ -143,7 +144,7 @@ int ompi_comm_cid_init (void)
|
|||||||
ompi_comm_world_thread_level_mult = 1;
|
ompi_comm_world_thread_level_mult = 1;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
} else if (OMPI_ERR_NOT_IMPLEMENTED == ret) {
|
} else if (OMPI_ERR_NOT_IMPLEMENTED == OPAL_SOS_GET_ERROR_CODE(ret)) {
|
||||||
if (ompi_mpi_thread_multiple) {
|
if (ompi_mpi_thread_multiple) {
|
||||||
ompi_comm_world_thread_level_mult = 1;
|
ompi_comm_world_thread_level_mult = 1;
|
||||||
}
|
}
|
||||||
|
@ -199,7 +199,7 @@ ompi_errhandler_t *ompi_errhandler_create(ompi_errhandler_type_t object_type,
|
|||||||
|
|
||||||
new_errhandler = OBJ_NEW(ompi_errhandler_t);
|
new_errhandler = OBJ_NEW(ompi_errhandler_t);
|
||||||
if (NULL != new_errhandler) {
|
if (NULL != new_errhandler) {
|
||||||
if (OMPI_ERROR == new_errhandler->eh_f_to_c_index) {
|
if (OMPI_SUCCESS != new_errhandler->eh_f_to_c_index) {
|
||||||
OBJ_RELEASE(new_errhandler);
|
OBJ_RELEASE(new_errhandler);
|
||||||
new_errhandler = NULL;
|
new_errhandler = NULL;
|
||||||
} else {
|
} else {
|
||||||
|
@ -62,7 +62,7 @@ ompi_group_t *ompi_group_allocate(int group_size)
|
|||||||
if (NULL == new_group)
|
if (NULL == new_group)
|
||||||
goto error_exit;
|
goto error_exit;
|
||||||
|
|
||||||
if (OMPI_ERROR == new_group->grp_f_to_c_index) {
|
if (OMPI_SUCCESS != new_group->grp_f_to_c_index) {
|
||||||
OBJ_RELEASE (new_group);
|
OBJ_RELEASE (new_group);
|
||||||
new_group = NULL;
|
new_group = NULL;
|
||||||
goto error_exit;
|
goto error_exit;
|
||||||
@ -106,7 +106,7 @@ ompi_group_t *ompi_group_allocate_sporadic(int group_size)
|
|||||||
if( NULL == new_group) {
|
if( NULL == new_group) {
|
||||||
goto error_exit;
|
goto error_exit;
|
||||||
}
|
}
|
||||||
if (OMPI_ERROR == new_group->grp_f_to_c_index) {
|
if (OMPI_SUCCESS != new_group->grp_f_to_c_index) {
|
||||||
OBJ_RELEASE(new_group);
|
OBJ_RELEASE(new_group);
|
||||||
new_group = NULL;
|
new_group = NULL;
|
||||||
goto error_exit;
|
goto error_exit;
|
||||||
@ -148,7 +148,7 @@ ompi_group_t *ompi_group_allocate_strided(void)
|
|||||||
if( NULL == new_group ) {
|
if( NULL == new_group ) {
|
||||||
goto error_exit;
|
goto error_exit;
|
||||||
}
|
}
|
||||||
if (OMPI_ERROR == new_group->grp_f_to_c_index) {
|
if (OMPI_SUCCESS != new_group->grp_f_to_c_index) {
|
||||||
OBJ_RELEASE(new_group);
|
OBJ_RELEASE(new_group);
|
||||||
new_group = NULL;
|
new_group = NULL;
|
||||||
goto error_exit;
|
goto error_exit;
|
||||||
@ -175,7 +175,7 @@ ompi_group_t *ompi_group_allocate_bmap(int orig_group_size , int group_size)
|
|||||||
if( NULL == new_group) {
|
if( NULL == new_group) {
|
||||||
goto error_exit;
|
goto error_exit;
|
||||||
}
|
}
|
||||||
if (OMPI_ERROR == new_group->grp_f_to_c_index) {
|
if (OMPI_SUCCESS != new_group->grp_f_to_c_index) {
|
||||||
OBJ_RELEASE(new_group);
|
OBJ_RELEASE(new_group);
|
||||||
new_group = NULL;
|
new_group = NULL;
|
||||||
goto error_exit;
|
goto error_exit;
|
||||||
|
@ -31,6 +31,7 @@
|
|||||||
#include "opal/datatype/opal_convertor.h"
|
#include "opal/datatype/opal_convertor.h"
|
||||||
#include "opal/mca/crs/crs.h"
|
#include "opal/mca/crs/crs.h"
|
||||||
#include "opal/mca/crs/base/base.h"
|
#include "opal/mca/crs/base/base.h"
|
||||||
|
#include "opal/util/opal_sos.h"
|
||||||
|
|
||||||
#include "ompi/mca/btl/btl.h"
|
#include "ompi/mca/btl/btl.h"
|
||||||
|
|
||||||
@ -272,7 +273,7 @@ static inline int mca_bml_base_send( mca_bml_base_btl_t* bml_btl,
|
|||||||
|
|
||||||
des->des_context = (void*) bml_btl;
|
des->des_context = (void*) bml_btl;
|
||||||
rc = btl->btl_send(btl, bml_btl->btl_endpoint, des, tag);
|
rc = btl->btl_send(btl, bml_btl->btl_endpoint, des, tag);
|
||||||
if(rc == OMPI_ERR_RESOURCE_BUSY)
|
if(OPAL_SOS_GET_ERROR_CODE(rc) == OMPI_ERR_RESOURCE_BUSY)
|
||||||
rc = OMPI_SUCCESS;
|
rc = OMPI_SUCCESS;
|
||||||
|
|
||||||
return rc;
|
return rc;
|
||||||
|
@ -404,7 +404,7 @@ static int mca_bml_r2_add_procs( size_t nprocs,
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (mca_bml_r2.show_unreach_errors &&
|
if (mca_bml_r2.show_unreach_errors &&
|
||||||
OMPI_ERR_UNREACH == ret) {
|
OMPI_ERR_UNREACH == OPAL_SOS_GET_ERROR_CODE(ret)) {
|
||||||
orte_show_help("help-mca-bml-r2.txt",
|
orte_show_help("help-mca-bml-r2.txt",
|
||||||
"unreachable proc",
|
"unreachable proc",
|
||||||
true,
|
true,
|
||||||
|
@ -34,6 +34,7 @@
|
|||||||
#include "ompi/mca/btl/btl.h"
|
#include "ompi/mca/btl/btl.h"
|
||||||
#include "opal/sys/timer.h"
|
#include "opal/sys/timer.h"
|
||||||
#include "opal/util/argv.h"
|
#include "opal/util/argv.h"
|
||||||
|
#include "opal/util/opal_sos.h"
|
||||||
#include "opal/mca/base/mca_base_param.h"
|
#include "opal/mca/base/mca_base_param.h"
|
||||||
#include "orte/mca/errmgr/errmgr.h"
|
#include "orte/mca/errmgr/errmgr.h"
|
||||||
#include "ompi/mca/btl/base/base.h"
|
#include "ompi/mca/btl/base/base.h"
|
||||||
|
@ -32,6 +32,7 @@
|
|||||||
#include "opal/class/opal_bitmap.h"
|
#include "opal/class/opal_bitmap.h"
|
||||||
#include "opal/util/output.h"
|
#include "opal/util/output.h"
|
||||||
#include "opal/util/arch.h"
|
#include "opal/util/arch.h"
|
||||||
|
#include "opal/util/opal_sos.h"
|
||||||
|
|
||||||
#include "ompi/mca/btl/btl.h"
|
#include "ompi/mca/btl/btl.h"
|
||||||
#include "ompi/mca/btl/base/btl_base_error.h"
|
#include "ompi/mca/btl/base/btl_base_error.h"
|
||||||
@ -291,7 +292,7 @@ static int create_srq(mca_btl_openib_module_t *openib_btl)
|
|||||||
|
|
||||||
/* Check if our device supports modify srq ability */
|
/* Check if our device supports modify srq ability */
|
||||||
rc = check_if_device_support_modify_srq(openib_btl);
|
rc = check_if_device_support_modify_srq(openib_btl);
|
||||||
if(OMPI_ERR_NOT_SUPPORTED == rc) {
|
if(OMPI_ERR_NOT_SUPPORTED == OPAL_SOS_GET_ERROR_CODE(rc)) {
|
||||||
device_support_modify_srq = false;
|
device_support_modify_srq = false;
|
||||||
} else if(OMPI_SUCCESS != rc) {
|
} else if(OMPI_SUCCESS != rc) {
|
||||||
mca_btl_openib_show_init_error(__FILE__, __LINE__,
|
mca_btl_openib_show_init_error(__FILE__, __LINE__,
|
||||||
@ -481,7 +482,8 @@ static int mca_btl_openib_tune_endpoint(mca_btl_openib_module_t* openib_btl,
|
|||||||
ret = ompi_btl_openib_ini_query(endpoint->rem_info.rem_vendor_id,
|
ret = ompi_btl_openib_ini_query(endpoint->rem_info.rem_vendor_id,
|
||||||
endpoint->rem_info.rem_vendor_part_id, &values);
|
endpoint->rem_info.rem_vendor_part_id, &values);
|
||||||
|
|
||||||
if (OMPI_SUCCESS != ret && OMPI_ERR_NOT_FOUND != ret) {
|
if (OMPI_SUCCESS != ret &&
|
||||||
|
OMPI_ERR_NOT_FOUND != OPAL_SOS_GET_ERROR_CODE(ret)) {
|
||||||
orte_show_help("help-mpi-btl-openib.txt",
|
orte_show_help("help-mpi-btl-openib.txt",
|
||||||
"error in device init", true,
|
"error in device init", true,
|
||||||
orte_process_info.nodename,
|
orte_process_info.nodename,
|
||||||
@ -1586,7 +1588,7 @@ int mca_btl_openib_put( mca_btl_base_module_t* btl,
|
|||||||
OPAL_THREAD_LOCK(&ep->endpoint_lock);
|
OPAL_THREAD_LOCK(&ep->endpoint_lock);
|
||||||
rc = check_endpoint_state(ep, descriptor, &ep->pending_put_frags);
|
rc = check_endpoint_state(ep, descriptor, &ep->pending_put_frags);
|
||||||
OPAL_THREAD_UNLOCK(&ep->endpoint_lock);
|
OPAL_THREAD_UNLOCK(&ep->endpoint_lock);
|
||||||
if(OMPI_ERR_RESOURCE_BUSY == rc)
|
if(OMPI_ERR_RESOURCE_BUSY == OPAL_SOS_GET_ERROR_CODE(rc))
|
||||||
return OMPI_SUCCESS;
|
return OMPI_SUCCESS;
|
||||||
if(OMPI_SUCCESS != rc)
|
if(OMPI_SUCCESS != rc)
|
||||||
return rc;
|
return rc;
|
||||||
@ -1657,7 +1659,7 @@ int mca_btl_openib_get(mca_btl_base_module_t* btl,
|
|||||||
OPAL_THREAD_LOCK(&ep->endpoint_lock);
|
OPAL_THREAD_LOCK(&ep->endpoint_lock);
|
||||||
rc = check_endpoint_state(ep, descriptor, &ep->pending_get_frags);
|
rc = check_endpoint_state(ep, descriptor, &ep->pending_get_frags);
|
||||||
OPAL_THREAD_UNLOCK(&ep->endpoint_lock);
|
OPAL_THREAD_UNLOCK(&ep->endpoint_lock);
|
||||||
if(OMPI_ERR_RESOURCE_BUSY == rc)
|
if(OMPI_ERR_RESOURCE_BUSY == OPAL_SOS_GET_ERROR_CODE(rc))
|
||||||
return OMPI_SUCCESS;
|
return OMPI_SUCCESS;
|
||||||
if(OMPI_SUCCESS != rc)
|
if(OMPI_SUCCESS != rc)
|
||||||
return rc;
|
return rc;
|
||||||
|
@ -1035,8 +1035,8 @@ static int prepare_device_for_use(mca_btl_openib_device_t *device)
|
|||||||
if (OMPI_SUCCESS != rc) {
|
if (OMPI_SUCCESS != rc) {
|
||||||
/* If we're "out of memory", this usually means that we ran
|
/* If we're "out of memory", this usually means that we ran
|
||||||
out of registered memory, so show that error message */
|
out of registered memory, so show that error message */
|
||||||
if (OMPI_ERR_OUT_OF_RESOURCE == rc ||
|
if (OMPI_ERR_OUT_OF_RESOURCE == OPAL_SOS_GET_ERROR_CODE(rc) ||
|
||||||
OMPI_ERR_TEMP_OUT_OF_RESOURCE == rc) {
|
OMPI_ERR_TEMP_OUT_OF_RESOURCE == OPAL_SOS_GET_ERROR_CODE(rc)) {
|
||||||
errno = ENOMEM;
|
errno = ENOMEM;
|
||||||
mca_btl_openib_show_init_error(__FILE__, __LINE__,
|
mca_btl_openib_show_init_error(__FILE__, __LINE__,
|
||||||
"ompi_free_list_init_ex_new",
|
"ompi_free_list_init_ex_new",
|
||||||
@ -1071,8 +1071,8 @@ static int prepare_device_for_use(mca_btl_openib_device_t *device)
|
|||||||
/* If we're "out of memory", this usually means that we
|
/* If we're "out of memory", this usually means that we
|
||||||
ran out of registered memory, so show that error
|
ran out of registered memory, so show that error
|
||||||
message */
|
message */
|
||||||
if (OMPI_ERR_OUT_OF_RESOURCE == rc ||
|
if (OMPI_ERR_OUT_OF_RESOURCE == OPAL_SOS_GET_ERROR_CODE(rc) ||
|
||||||
OMPI_ERR_TEMP_OUT_OF_RESOURCE == rc) {
|
OMPI_ERR_TEMP_OUT_OF_RESOURCE == OPAL_SOS_GET_ERROR_CODE(rc)) {
|
||||||
errno = ENOMEM;
|
errno = ENOMEM;
|
||||||
mca_btl_openib_show_init_error(__FILE__, __LINE__,
|
mca_btl_openib_show_init_error(__FILE__, __LINE__,
|
||||||
"ompi_free_list_init_ex_new",
|
"ompi_free_list_init_ex_new",
|
||||||
@ -1567,11 +1567,12 @@ static int init_one_device(opal_list_t *btl_list, struct ibv_device* ib_dev)
|
|||||||
ret = ompi_btl_openib_ini_query(device->ib_dev_attr.vendor_id,
|
ret = ompi_btl_openib_ini_query(device->ib_dev_attr.vendor_id,
|
||||||
device->ib_dev_attr.vendor_part_id,
|
device->ib_dev_attr.vendor_part_id,
|
||||||
&values);
|
&values);
|
||||||
if (OMPI_SUCCESS != ret && OMPI_ERR_NOT_FOUND != ret) {
|
if (OMPI_SUCCESS != ret &&
|
||||||
|
OMPI_ERR_NOT_FOUND != OPAL_SOS_GET_ERROR_CODE(ret)) {
|
||||||
/* If we get a serious error, propagate it upwards */
|
/* If we get a serious error, propagate it upwards */
|
||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
if (OMPI_ERR_NOT_FOUND == ret) {
|
if (OMPI_ERR_NOT_FOUND == OPAL_SOS_GET_ERROR_CODE(ret)) {
|
||||||
/* If we didn't find a matching device in the INI files, output a
|
/* If we didn't find a matching device in the INI files, output a
|
||||||
warning that we're using default values (unless overridden
|
warning that we're using default values (unless overridden
|
||||||
that we don't want to see these warnings) */
|
that we don't want to see these warnings) */
|
||||||
@ -1587,7 +1588,8 @@ static int init_one_device(opal_list_t *btl_list, struct ibv_device* ib_dev)
|
|||||||
/* Note that even if we don't find default values, "values" will
|
/* Note that even if we don't find default values, "values" will
|
||||||
be set indicating that it does not have good values */
|
be set indicating that it does not have good values */
|
||||||
ret = ompi_btl_openib_ini_query(0, 0, &default_values);
|
ret = ompi_btl_openib_ini_query(0, 0, &default_values);
|
||||||
if (OMPI_SUCCESS != ret && OMPI_ERR_NOT_FOUND != ret) {
|
if (OMPI_SUCCESS != ret &&
|
||||||
|
OMPI_ERR_NOT_FOUND != OPAL_SOS_GET_ERROR_CODE(ret)) {
|
||||||
/* If we get a serious error, propagate it upwards */
|
/* If we get a serious error, propagate it upwards */
|
||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
@ -1807,7 +1809,7 @@ static int init_one_device(opal_list_t *btl_list, struct ibv_device* ib_dev)
|
|||||||
if (OMPI_SUCCESS != ret) {
|
if (OMPI_SUCCESS != ret) {
|
||||||
/* Out of bounds error indicates that we hit max btl number
|
/* Out of bounds error indicates that we hit max btl number
|
||||||
* don't propagate the error to the caller */
|
* don't propagate the error to the caller */
|
||||||
if (OMPI_ERR_VALUE_OUT_OF_BOUNDS == ret) {
|
if (OMPI_ERR_VALUE_OUT_OF_BOUNDS == OPAL_SOS_GET_ERROR_CODE(ret)) {
|
||||||
ret = OMPI_SUCCESS;
|
ret = OMPI_SUCCESS;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
@ -2722,7 +2724,7 @@ btl_openib_component_init(int *num_btl_modules,
|
|||||||
/* If we get NOT_SUPPORTED, then no CPC was found for this
|
/* If we get NOT_SUPPORTED, then no CPC was found for this
|
||||||
port. But that's not a fatal error -- just keep going;
|
port. But that's not a fatal error -- just keep going;
|
||||||
let's see if we find any usable openib modules or not. */
|
let's see if we find any usable openib modules or not. */
|
||||||
if (OMPI_ERR_NOT_SUPPORTED == ret) {
|
if (OMPI_ERR_NOT_SUPPORTED == OPAL_SOS_GET_ERROR_CODE(ret)) {
|
||||||
continue;
|
continue;
|
||||||
} else if (OMPI_SUCCESS != ret) {
|
} else if (OMPI_SUCCESS != ret) {
|
||||||
/* All others *are* fatal. Note that we already did a
|
/* All others *are* fatal. Note that we already did a
|
||||||
@ -2878,7 +2880,7 @@ static int progress_no_credits_pending_frags(mca_btl_base_endpoint_t *ep)
|
|||||||
error upward. */
|
error upward. */
|
||||||
rc = mca_btl_openib_endpoint_post_send(ep, to_send_frag(frag));
|
rc = mca_btl_openib_endpoint_post_send(ep, to_send_frag(frag));
|
||||||
if (OPAL_UNLIKELY(OMPI_SUCCESS != rc &&
|
if (OPAL_UNLIKELY(OMPI_SUCCESS != rc &&
|
||||||
OMPI_ERR_RESOURCE_BUSY != rc)) {
|
OMPI_ERR_RESOURCE_BUSY != OPAL_SOS_GET_ERROR_CODE(rc))) {
|
||||||
OPAL_THREAD_UNLOCK(&ep->endpoint_lock);
|
OPAL_THREAD_UNLOCK(&ep->endpoint_lock);
|
||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
@ -2904,8 +2906,8 @@ void mca_btl_openib_frag_progress_pending_put_get(mca_btl_base_endpoint_t *ep,
|
|||||||
OPAL_THREAD_UNLOCK(&ep->endpoint_lock);
|
OPAL_THREAD_UNLOCK(&ep->endpoint_lock);
|
||||||
if(NULL == frag)
|
if(NULL == frag)
|
||||||
break;
|
break;
|
||||||
if(mca_btl_openib_get((mca_btl_base_module_t *)openib_btl, ep,
|
if(OPAL_SOS_GET_ERROR_CODE(mca_btl_openib_get((mca_btl_base_module_t *)openib_btl, ep,
|
||||||
&to_base_frag(frag)->base) == OMPI_ERR_OUT_OF_RESOURCE)
|
&to_base_frag(frag)->base)) == OMPI_ERR_OUT_OF_RESOURCE)
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2916,8 +2918,8 @@ void mca_btl_openib_frag_progress_pending_put_get(mca_btl_base_endpoint_t *ep,
|
|||||||
OPAL_THREAD_UNLOCK(&ep->endpoint_lock);
|
OPAL_THREAD_UNLOCK(&ep->endpoint_lock);
|
||||||
if(NULL == frag)
|
if(NULL == frag)
|
||||||
break;
|
break;
|
||||||
if(mca_btl_openib_put((mca_btl_base_module_t*)openib_btl, ep,
|
if(OPAL_SOS_GET_ERROR_CODE(mca_btl_openib_put((mca_btl_base_module_t*)openib_btl, ep,
|
||||||
&to_base_frag(frag)->base) == OMPI_ERR_OUT_OF_RESOURCE)
|
&to_base_frag(frag)->base)) == OMPI_ERR_OUT_OF_RESOURCE)
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -3293,14 +3295,13 @@ error:
|
|||||||
cq_name[cq], btl_openib_component_status_to_string(wc->status),
|
cq_name[cq], btl_openib_component_status_to_string(wc->status),
|
||||||
wc->status, wc->wr_id,
|
wc->status, wc->wr_id,
|
||||||
wc->opcode, wc->vendor_err, qp));
|
wc->opcode, wc->vendor_err, qp));
|
||||||
orte_notifier.peer(ORTE_NOTIFIER_INFRA, ORTE_ERR_COMM_FAILURE,
|
orte_notifier.log_peer(ORTE_NOTIFIER_CRIT, ORTE_ERR_COMM_FAILURE,
|
||||||
remote_proc ? &remote_proc->proc_name : NULL,
|
remote_proc ? &remote_proc->proc_name : NULL,
|
||||||
"\n\tIB polling %s with status %s "
|
"\n\tIB polling %s with status %s "
|
||||||
"status number %d for wr_id %" PRIx64 " opcode %d vendor error %d qp_idx %d",
|
"status number %d for wr_id %llu opcode %d vendor error %d qp_idx %d",
|
||||||
cq_name[cq],
|
cq_name[cq], btl_openib_component_status_to_string(wc->status),
|
||||||
btl_openib_component_status_to_string(wc->status),
|
wc->status, wc->wr_id,
|
||||||
wc->status, wc->wr_id,
|
wc->opcode, wc->vendor_err, qp);
|
||||||
wc->opcode, wc->vendor_err, qp);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (IBV_WC_RNR_RETRY_EXC_ERR == wc->status ||
|
if (IBV_WC_RNR_RETRY_EXC_ERR == wc->status ||
|
||||||
@ -3319,23 +3320,23 @@ error:
|
|||||||
"srq rnr retry exceeded", true,
|
"srq rnr retry exceeded", true,
|
||||||
orte_process_info.nodename, device_name,
|
orte_process_info.nodename, device_name,
|
||||||
peer_hostname);
|
peer_hostname);
|
||||||
orte_notifier.help(ORTE_NOTIFIER_INFRA, ORTE_ERR_COMM_FAILURE,
|
orte_notifier.show_help(ORTE_NOTIFIER_CRIT, ORTE_ERR_COMM_FAILURE,
|
||||||
"help-mpi-btl-openib.txt",
|
"help-mpi-btl-openib.txt",
|
||||||
BTL_OPENIB_QP_TYPE_PP(qp) ?
|
BTL_OPENIB_QP_TYPE_PP(qp) ?
|
||||||
"pp rnr retry exceeded" :
|
"pp rnr retry exceeded" :
|
||||||
"srq rnr retry exceeded",
|
"srq rnr retry exceeded",
|
||||||
orte_process_info.nodename, device_name,
|
orte_process_info.nodename, device_name,
|
||||||
peer_hostname);
|
peer_hostname);
|
||||||
} else if (IBV_WC_RETRY_EXC_ERR == wc->status) {
|
} else if (IBV_WC_RETRY_EXC_ERR == wc->status) {
|
||||||
orte_show_help("help-mpi-btl-openib.txt",
|
orte_show_help("help-mpi-btl-openib.txt",
|
||||||
"pp retry exceeded", true,
|
"pp retry exceeded", true,
|
||||||
orte_process_info.nodename,
|
orte_process_info.nodename,
|
||||||
device_name, peer_hostname);
|
device_name, peer_hostname);
|
||||||
orte_notifier.help(ORTE_NOTIFIER_INFRA, ORTE_ERR_COMM_FAILURE,
|
orte_notifier.show_help(ORTE_NOTIFIER_CRIT, ORTE_ERR_COMM_FAILURE,
|
||||||
"help-mpi-btl-openib.txt",
|
"help-mpi-btl-openib.txt",
|
||||||
"pp retry exceeded",
|
"pp retry exceeded",
|
||||||
orte_process_info.nodename,
|
orte_process_info.nodename,
|
||||||
device_name, peer_hostname);
|
device_name, peer_hostname);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -33,6 +33,7 @@
|
|||||||
|
|
||||||
#include "opal_stdint.h"
|
#include "opal_stdint.h"
|
||||||
#include "opal/util/output.h"
|
#include "opal/util/output.h"
|
||||||
|
#include "opal/util/opal_sos.h"
|
||||||
|
|
||||||
#include "orte/util/show_help.h"
|
#include "orte/util/show_help.h"
|
||||||
|
|
||||||
@ -678,7 +679,7 @@ void mca_btl_openib_endpoint_connected(mca_btl_openib_endpoint_t *endpoint)
|
|||||||
frag = to_send_frag(frag_item);
|
frag = to_send_frag(frag_item);
|
||||||
/* We need to post this one */
|
/* We need to post this one */
|
||||||
|
|
||||||
if(OMPI_ERROR == mca_btl_openib_endpoint_post_send(endpoint, frag))
|
if(OMPI_SUCCESS != mca_btl_openib_endpoint_post_send(endpoint, frag))
|
||||||
BTL_ERROR(("Error posting send"));
|
BTL_ERROR(("Error posting send"));
|
||||||
}
|
}
|
||||||
OPAL_THREAD_UNLOCK(&endpoint->endpoint_lock);
|
OPAL_THREAD_UNLOCK(&endpoint->endpoint_lock);
|
||||||
@ -706,7 +707,7 @@ int mca_btl_openib_endpoint_send(mca_btl_base_endpoint_t* ep,
|
|||||||
rc = mca_btl_openib_endpoint_post_send(ep, frag);
|
rc = mca_btl_openib_endpoint_post_send(ep, frag);
|
||||||
}
|
}
|
||||||
OPAL_THREAD_UNLOCK(&ep->endpoint_lock);
|
OPAL_THREAD_UNLOCK(&ep->endpoint_lock);
|
||||||
if (OPAL_UNLIKELY(OMPI_ERR_RESOURCE_BUSY == rc)) {
|
if (OPAL_UNLIKELY(OMPI_ERR_RESOURCE_BUSY == OPAL_SOS_GET_ERROR_CODE(rc))) {
|
||||||
rc = OMPI_SUCCESS;
|
rc = OMPI_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -890,7 +891,7 @@ static int mca_btl_openib_endpoint_send_eager_rdma(
|
|||||||
));
|
));
|
||||||
}
|
}
|
||||||
rc = mca_btl_openib_endpoint_send(endpoint, frag);
|
rc = mca_btl_openib_endpoint_send(endpoint, frag);
|
||||||
if (OMPI_SUCCESS == rc ||OMPI_ERR_RESOURCE_BUSY == rc)
|
if (OMPI_SUCCESS == rc || OMPI_ERR_RESOURCE_BUSY == OPAL_SOS_GET_ERROR_CODE(rc))
|
||||||
return OMPI_SUCCESS;
|
return OMPI_SUCCESS;
|
||||||
|
|
||||||
MCA_BTL_IB_FRAG_RETURN(frag);
|
MCA_BTL_IB_FRAG_RETURN(frag);
|
||||||
|
@ -27,6 +27,7 @@
|
|||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
|
|
||||||
#include "orte/util/show_help.h"
|
#include "orte/util/show_help.h"
|
||||||
|
#include "opal/util/opal_sos.h"
|
||||||
#include "opal/mca/base/mca_base_param.h"
|
#include "opal/mca/base/mca_base_param.h"
|
||||||
|
|
||||||
#include "btl_openib.h"
|
#include "btl_openib.h"
|
||||||
@ -122,13 +123,13 @@ int ompi_btl_openib_ini_init(void)
|
|||||||
/* Note that NOT_FOUND and SUCCESS are not fatal errors
|
/* Note that NOT_FOUND and SUCCESS are not fatal errors
|
||||||
and we keep going. Other errors are treated as
|
and we keep going. Other errors are treated as
|
||||||
fatal */
|
fatal */
|
||||||
if (OMPI_ERR_NOT_FOUND != ret && OMPI_SUCCESS != ret) {
|
if (OMPI_ERR_NOT_FOUND != OPAL_SOS_GET_ERROR_CODE(ret) && OMPI_SUCCESS != ret) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
str = colon + 1;
|
str = colon + 1;
|
||||||
}
|
}
|
||||||
/* Parse the last file if we didn't have a fatal error above */
|
/* Parse the last file if we didn't have a fatal error above */
|
||||||
if (OMPI_ERR_NOT_FOUND != ret && OMPI_SUCCESS != ret) {
|
if (OMPI_ERR_NOT_FOUND != OPAL_SOS_GET_ERROR_CODE(ret) && OMPI_SUCCESS != ret) {
|
||||||
ret = parse_file(str);
|
ret = parse_file(str);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -139,7 +140,7 @@ int ompi_btl_openib_ini_init(void)
|
|||||||
/* Return SUCCESS unless we got a fatal error */
|
/* Return SUCCESS unless we got a fatal error */
|
||||||
|
|
||||||
initialized = true;
|
initialized = true;
|
||||||
return (OMPI_SUCCESS == ret || OMPI_ERR_NOT_FOUND == ret) ?
|
return (OMPI_SUCCESS == ret || OMPI_ERR_NOT_FOUND == OPAL_SOS_GET_ERROR_CODE(ret)) ?
|
||||||
OMPI_SUCCESS : ret;
|
OMPI_SUCCESS : ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -28,6 +28,7 @@
|
|||||||
#include "orte/util/show_help.h"
|
#include "orte/util/show_help.h"
|
||||||
#include "opal/util/argv.h"
|
#include "opal/util/argv.h"
|
||||||
#include "opal/util/output.h"
|
#include "opal/util/output.h"
|
||||||
|
#include "opal/util/opal_sos.h"
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Array of all possible connection functions
|
* Array of all possible connection functions
|
||||||
@ -218,7 +219,7 @@ int ompi_btl_openib_connect_base_init(void)
|
|||||||
opal_output(-1, "found available cpc (SUCCESS init): %s",
|
opal_output(-1, "found available cpc (SUCCESS init): %s",
|
||||||
all[i]->cbc_name);
|
all[i]->cbc_name);
|
||||||
continue;
|
continue;
|
||||||
} else if (OMPI_ERR_NOT_SUPPORTED == rc) {
|
} else if (OMPI_ERR_NOT_SUPPORTED == OPAL_SOS_GET_ERROR_CODE(rc)) {
|
||||||
continue;
|
continue;
|
||||||
} else {
|
} else {
|
||||||
return rc;
|
return rc;
|
||||||
@ -264,7 +265,8 @@ int ompi_btl_openib_connect_base_select_for_local_port(mca_btl_openib_module_t *
|
|||||||
strcat(msg, available[i]->cbc_name);
|
strcat(msg, available[i]->cbc_name);
|
||||||
|
|
||||||
rc = available[i]->cbc_query(btl, &cpcs[cpc_index]);
|
rc = available[i]->cbc_query(btl, &cpcs[cpc_index]);
|
||||||
if (OMPI_ERR_NOT_SUPPORTED == rc || OMPI_ERR_UNREACH == rc) {
|
if (OMPI_ERR_NOT_SUPPORTED == OPAL_SOS_GET_ERROR_CODE(rc) ||
|
||||||
|
OMPI_ERR_UNREACH == OPAL_SOS_GET_ERROR_CODE(rc)) {
|
||||||
continue;
|
continue;
|
||||||
} else if (OMPI_SUCCESS != rc) {
|
} else if (OMPI_SUCCESS != rc) {
|
||||||
free(cpcs);
|
free(cpcs);
|
||||||
|
@ -857,7 +857,7 @@ static int ibcm_component_query(mca_btl_openib_module_t *btl,
|
|||||||
|
|
||||||
error:
|
error:
|
||||||
ibcm_module_finalize(btl, (ompi_btl_openib_connect_base_module_t *) m);
|
ibcm_module_finalize(btl, (ompi_btl_openib_connect_base_module_t *) m);
|
||||||
if (OMPI_ERR_NOT_SUPPORTED == rc) {
|
if (OMPI_ERR_NOT_SUPPORTED == OPAL_SOS_GET_ERROR_CODE(rc)) {
|
||||||
BTL_VERBOSE(("unavailable for use on %s:%d; skipped",
|
BTL_VERBOSE(("unavailable for use on %s:%d; skipped",
|
||||||
ibv_get_device_name(btl->device->ib_dev),
|
ibv_get_device_name(btl->device->ib_dev),
|
||||||
btl->port_num));
|
btl->port_num));
|
||||||
|
@ -29,6 +29,7 @@
|
|||||||
#include "orte/util/show_help.h"
|
#include "orte/util/show_help.h"
|
||||||
#include "opal/util/error.h"
|
#include "opal/util/error.h"
|
||||||
#include "opal/util/output.h"
|
#include "opal/util/output.h"
|
||||||
|
#include "opal/util/opal_sos.h"
|
||||||
#include "orte/mca/rml/rml.h"
|
#include "orte/mca/rml/rml.h"
|
||||||
#include "orte/mca/rml/rml_types.h"
|
#include "orte/mca/rml/rml_types.h"
|
||||||
#include "orte/mca/errmgr/errmgr.h"
|
#include "orte/mca/errmgr/errmgr.h"
|
||||||
|
@ -32,6 +32,7 @@
|
|||||||
|
|
||||||
#include "opal/util/output.h"
|
#include "opal/util/output.h"
|
||||||
#include "opal/util/error.h"
|
#include "opal/util/error.h"
|
||||||
|
#include "opal/util/opal_sos.h"
|
||||||
#include "orte/util/show_help.h"
|
#include "orte/util/show_help.h"
|
||||||
|
|
||||||
#include "btl_openib_fd.h"
|
#include "btl_openib_fd.h"
|
||||||
@ -1912,7 +1913,7 @@ out3:
|
|||||||
out1:
|
out1:
|
||||||
free(*cpc);
|
free(*cpc);
|
||||||
out:
|
out:
|
||||||
if (OMPI_ERR_NOT_SUPPORTED == rc) {
|
if (OMPI_ERR_NOT_SUPPORTED == OPAL_SOS_GET_ERROR_CODE(rc)) {
|
||||||
opal_output_verbose(5, mca_btl_base_output,
|
opal_output_verbose(5, mca_btl_base_output,
|
||||||
"openib BTL: rdmacm CPC unavailable for use on %s:%d; skipped",
|
"openib BTL: rdmacm CPC unavailable for use on %s:%d; skipped",
|
||||||
ibv_get_device_name(openib_btl->device->ib_dev),
|
ibv_get_device_name(openib_btl->device->ib_dev),
|
||||||
|
@ -67,7 +67,7 @@ OBJ_CLASS_DECLARATION(mca_btl_portals_frag_recv_t);
|
|||||||
ompi_free_list_item_t *item; \
|
ompi_free_list_item_t *item; \
|
||||||
OMPI_FREE_LIST_GET(&((mca_btl_portals_module_t*)btl_macro)->portals_frag_eager, item, rc); \
|
OMPI_FREE_LIST_GET(&((mca_btl_portals_module_t*)btl_macro)->portals_frag_eager, item, rc); \
|
||||||
frag = (mca_btl_portals_frag_t*) item; \
|
frag = (mca_btl_portals_frag_t*) item; \
|
||||||
if (rc == OMPI_ERR_TEMP_OUT_OF_RESOURCE) { \
|
if (OPAL_SOS_GET_ERROR_CODE(rc) == OMPI_ERR_TEMP_OUT_OF_RESOURCE) { \
|
||||||
OMPI_BTL_PORTALS_FRAG_ALLOC_MAX(btl_macro, frag, rc); \
|
OMPI_BTL_PORTALS_FRAG_ALLOC_MAX(btl_macro, frag, rc); \
|
||||||
} \
|
} \
|
||||||
}
|
}
|
||||||
|
@ -959,7 +959,8 @@ mca_btl_base_module_t** mca_btl_tcp_component_init(int *num_btl_modules,
|
|||||||
}
|
}
|
||||||
#if OPAL_WANT_IPV6
|
#if OPAL_WANT_IPV6
|
||||||
if((ret = mca_btl_tcp_component_create_listen(AF_INET6)) != OMPI_SUCCESS) {
|
if((ret = mca_btl_tcp_component_create_listen(AF_INET6)) != OMPI_SUCCESS) {
|
||||||
if (!(OMPI_ERR_IN_ERRNO == ret && EAFNOSUPPORT == opal_socket_errno)) {
|
if (!(OMPI_ERR_IN_ERRNO == OPAL_SOS_GET_ERROR_CODE(ret) &&
|
||||||
|
EAFNOSUPPORT == opal_socket_errno)) {
|
||||||
opal_output (0, "mca_btl_tcp_component: IPv6 listening socket failed\n");
|
opal_output (0, "mca_btl_tcp_component: IPv6 listening socket failed\n");
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -1174,7 +1174,7 @@ mca_coll_sm2_comm_query(struct ompi_communicator_t *comm, int *priority)
|
|||||||
/* get process affinity mask */
|
/* get process affinity mask */
|
||||||
OPAL_PAFFINITY_CPU_ZERO(my_cpu_set);
|
OPAL_PAFFINITY_CPU_ZERO(my_cpu_set);
|
||||||
ret=opal_paffinity_base_get(&my_cpu_set);
|
ret=opal_paffinity_base_get(&my_cpu_set);
|
||||||
if( OPAL_ERR_NOT_FOUND == ret ) {
|
if( OPAL_ERR_NOT_FOUND == OPAL_SOS_GET_ERROR_CODE(ret) ) {
|
||||||
|
|
||||||
/* pa affinity not set, so socket index will be set to -1 */
|
/* pa affinity not set, so socket index will be set to -1 */
|
||||||
my_socket_index=-1;
|
my_socket_index=-1;
|
||||||
|
@ -48,6 +48,7 @@
|
|||||||
#include "opal/util/path.h"
|
#include "opal/util/path.h"
|
||||||
#include "opal/align.h"
|
#include "opal/align.h"
|
||||||
#include "opal/threads/mutex.h"
|
#include "opal/threads/mutex.h"
|
||||||
|
#include "opal/util/opal_sos.h"
|
||||||
|
|
||||||
#include "orte/mca/rml/rml.h"
|
#include "orte/mca/rml/rml.h"
|
||||||
#include "orte/util/name_fns.h"
|
#include "orte/util/name_fns.h"
|
||||||
|
@ -18,6 +18,7 @@
|
|||||||
|
|
||||||
#include "opal/mca/mca.h"
|
#include "opal/mca/mca.h"
|
||||||
#include "opal/mca/base/base.h"
|
#include "opal/mca/base/base.h"
|
||||||
|
#include "opal/util/opal_sos.h"
|
||||||
|
|
||||||
#include "opal/mca/base/mca_base_param.h"
|
#include "opal/mca/base/mca_base_param.h"
|
||||||
#include "opal/mca/base/mca_base_component_repository.h"
|
#include "opal/mca/base/mca_base_component_repository.h"
|
||||||
@ -40,7 +41,7 @@ int ompi_dpm_base_select(void)
|
|||||||
(mca_base_module_t **) &best_module,
|
(mca_base_module_t **) &best_module,
|
||||||
(mca_base_component_t **) &best_component))) {
|
(mca_base_component_t **) &best_component))) {
|
||||||
/* it is okay not to find any executable components */
|
/* it is okay not to find any executable components */
|
||||||
if (OMPI_ERR_NOT_FOUND == ret) {
|
if (OMPI_ERR_NOT_FOUND == OPAL_SOS_GET_ERROR_CODE(ret)) {
|
||||||
ret = OPAL_SUCCESS;
|
ret = OPAL_SUCCESS;
|
||||||
}
|
}
|
||||||
goto cleanup;
|
goto cleanup;
|
||||||
|
@ -28,6 +28,7 @@
|
|||||||
|
|
||||||
#include "opal/util/argv.h"
|
#include "opal/util/argv.h"
|
||||||
#include "opal/util/opal_getcwd.h"
|
#include "opal/util/opal_getcwd.h"
|
||||||
|
#include "opal/util/opal_sos.h"
|
||||||
|
|
||||||
#include "opal/dss/dss.h"
|
#include "opal/dss/dss.h"
|
||||||
#include "orte/mca/errmgr/errmgr.h"
|
#include "orte/mca/errmgr/errmgr.h"
|
||||||
|
@ -121,7 +121,7 @@ check_config_value_bool(char *key, ompi_info_t *info)
|
|||||||
|
|
||||||
info_not_found:
|
info_not_found:
|
||||||
param = mca_base_param_find("osc", "pt2pt", key);
|
param = mca_base_param_find("osc", "pt2pt", key);
|
||||||
if (param == OPAL_ERROR) return false;
|
if (param != OPAL_SUCCESS) return false;
|
||||||
|
|
||||||
ret = mca_base_param_lookup_int(param, &flag);
|
ret = mca_base_param_lookup_int(param, &flag);
|
||||||
if (OMPI_SUCCESS != ret) return false;
|
if (OMPI_SUCCESS != ret) return false;
|
||||||
|
@ -25,6 +25,7 @@
|
|||||||
#include "mpi.h"
|
#include "mpi.h"
|
||||||
#include "opal/runtime/opal_progress.h"
|
#include "opal/runtime/opal_progress.h"
|
||||||
#include "opal/threads/mutex.h"
|
#include "opal/threads/mutex.h"
|
||||||
|
#include "opal/util/opal_sos.h"
|
||||||
#include "ompi/communicator/communicator.h"
|
#include "ompi/communicator/communicator.h"
|
||||||
#include "ompi/mca/osc/base/base.h"
|
#include "ompi/mca/osc/base/base.h"
|
||||||
|
|
||||||
@ -122,7 +123,7 @@ ompi_osc_pt2pt_module_fence(int assert, ompi_win_t *win)
|
|||||||
|
|
||||||
ret = ompi_osc_pt2pt_sendreq_send(module, req);
|
ret = ompi_osc_pt2pt_sendreq_send(module, req);
|
||||||
|
|
||||||
if (OMPI_ERR_TEMP_OUT_OF_RESOURCE == ret ) {
|
if (OMPI_ERR_TEMP_OUT_OF_RESOURCE == OPAL_SOS_GET_ERROR_CODE(ret) ) {
|
||||||
opal_output_verbose(5, ompi_osc_base_output,
|
opal_output_verbose(5, ompi_osc_base_output,
|
||||||
"complete: failure in starting sendreq (%d). Will try later.",
|
"complete: failure in starting sendreq (%d). Will try later.",
|
||||||
ret);
|
ret);
|
||||||
@ -267,7 +268,7 @@ ompi_osc_pt2pt_module_complete(ompi_win_t *win)
|
|||||||
|
|
||||||
ret = ompi_osc_pt2pt_sendreq_send(module, req);
|
ret = ompi_osc_pt2pt_sendreq_send(module, req);
|
||||||
|
|
||||||
if (OMPI_ERR_TEMP_OUT_OF_RESOURCE == ret ) {
|
if (OMPI_ERR_TEMP_OUT_OF_RESOURCE == OPAL_SOS_GET_ERROR_CODE(ret) ) {
|
||||||
opal_output_verbose(5, ompi_osc_base_output,
|
opal_output_verbose(5, ompi_osc_base_output,
|
||||||
"complete: failure in starting sendreq (%d). Will try later.",
|
"complete: failure in starting sendreq (%d). Will try later.",
|
||||||
ret);
|
ret);
|
||||||
@ -480,7 +481,7 @@ ompi_osc_pt2pt_module_unlock(int target,
|
|||||||
|
|
||||||
ret = ompi_osc_pt2pt_sendreq_send(module, req);
|
ret = ompi_osc_pt2pt_sendreq_send(module, req);
|
||||||
|
|
||||||
if (OMPI_ERR_TEMP_OUT_OF_RESOURCE == ret ) {
|
if (OMPI_ERR_TEMP_OUT_OF_RESOURCE == OPAL_SOS_GET_ERROR_CODE(ret) ) {
|
||||||
opal_output_verbose(5, ompi_osc_base_output,
|
opal_output_verbose(5, ompi_osc_base_output,
|
||||||
"complete: failure in starting sendreq (%d). Will try later.",
|
"complete: failure in starting sendreq (%d). Will try later.",
|
||||||
ret);
|
ret);
|
||||||
|
@ -134,7 +134,7 @@ ompi_osc_rdma_module_accumulate(void *origin_addr, int origin_count,
|
|||||||
|
|
||||||
ret = ompi_osc_rdma_sendreq_send(module, sendreq);
|
ret = ompi_osc_rdma_sendreq_send(module, sendreq);
|
||||||
|
|
||||||
if (OMPI_ERR_TEMP_OUT_OF_RESOURCE == ret) {
|
if (OMPI_ERR_TEMP_OUT_OF_RESOURCE == OPAL_SOS_GET_ERROR_CODE(ret)) {
|
||||||
OPAL_THREAD_LOCK(&module->m_lock);
|
OPAL_THREAD_LOCK(&module->m_lock);
|
||||||
sendreq->req_module->m_num_pending_out -= 1;
|
sendreq->req_module->m_num_pending_out -= 1;
|
||||||
opal_list_append(&(module->m_pending_sendreqs),
|
opal_list_append(&(module->m_pending_sendreqs),
|
||||||
@ -206,7 +206,7 @@ ompi_osc_rdma_module_get(void *origin_addr,
|
|||||||
|
|
||||||
ret = ompi_osc_rdma_sendreq_send(module, sendreq);
|
ret = ompi_osc_rdma_sendreq_send(module, sendreq);
|
||||||
|
|
||||||
if (OMPI_ERR_TEMP_OUT_OF_RESOURCE == ret) {
|
if (OMPI_ERR_TEMP_OUT_OF_RESOURCE == OPAL_SOS_GET_ERROR_CODE(ret)) {
|
||||||
OPAL_THREAD_LOCK(&module->m_lock);
|
OPAL_THREAD_LOCK(&module->m_lock);
|
||||||
sendreq->req_module->m_num_pending_out -= 1;
|
sendreq->req_module->m_num_pending_out -= 1;
|
||||||
opal_list_append(&(module->m_pending_sendreqs),
|
opal_list_append(&(module->m_pending_sendreqs),
|
||||||
@ -274,7 +274,7 @@ ompi_osc_rdma_module_put(void *origin_addr, int origin_count,
|
|||||||
|
|
||||||
ret = ompi_osc_rdma_sendreq_send(module, sendreq);
|
ret = ompi_osc_rdma_sendreq_send(module, sendreq);
|
||||||
|
|
||||||
if (OMPI_ERR_TEMP_OUT_OF_RESOURCE == ret) {
|
if (OMPI_ERR_TEMP_OUT_OF_RESOURCE == OPAL_SOS_GET_ERROR_CODE(ret)) {
|
||||||
OPAL_THREAD_LOCK(&module->m_lock);
|
OPAL_THREAD_LOCK(&module->m_lock);
|
||||||
sendreq->req_module->m_num_pending_out -= 1;
|
sendreq->req_module->m_num_pending_out -= 1;
|
||||||
opal_list_append(&(module->m_pending_sendreqs),
|
opal_list_append(&(module->m_pending_sendreqs),
|
||||||
|
@ -128,7 +128,7 @@ check_config_value_bool(char *key, ompi_info_t *info)
|
|||||||
|
|
||||||
info_not_found:
|
info_not_found:
|
||||||
param = mca_base_param_find("osc", "rdma", key);
|
param = mca_base_param_find("osc", "rdma", key);
|
||||||
if (param == OPAL_ERROR) return false;
|
if (param != OPAL_SUCCESS) return false;
|
||||||
|
|
||||||
ret = mca_base_param_lookup_int(param, &flag);
|
ret = mca_base_param_lookup_int(param, &flag);
|
||||||
if (OMPI_SUCCESS != ret) return false;
|
if (OMPI_SUCCESS != ret) return false;
|
||||||
|
@ -140,7 +140,7 @@ ompi_osc_rdma_module_fence(int assert, ompi_win_t *win)
|
|||||||
opal_list_remove_first(&(module->m_copy_pending_sendreqs));
|
opal_list_remove_first(&(module->m_copy_pending_sendreqs));
|
||||||
|
|
||||||
ret = ompi_osc_rdma_sendreq_send(module, req);
|
ret = ompi_osc_rdma_sendreq_send(module, req);
|
||||||
if (OMPI_ERR_TEMP_OUT_OF_RESOURCE == ret) {
|
if (OMPI_ERR_TEMP_OUT_OF_RESOURCE == OPAL_SOS_GET_ERROR_CODE(ret)) {
|
||||||
opal_list_append(&(module->m_copy_pending_sendreqs), (opal_list_item_t*)req);
|
opal_list_append(&(module->m_copy_pending_sendreqs), (opal_list_item_t*)req);
|
||||||
} else if (OMPI_SUCCESS != ret) {
|
} else if (OMPI_SUCCESS != ret) {
|
||||||
return ret;
|
return ret;
|
||||||
@ -355,7 +355,7 @@ ompi_osc_rdma_module_complete(ompi_win_t *win)
|
|||||||
(ompi_osc_rdma_sendreq_t*) item;
|
(ompi_osc_rdma_sendreq_t*) item;
|
||||||
|
|
||||||
ret = ompi_osc_rdma_sendreq_send(module, req);
|
ret = ompi_osc_rdma_sendreq_send(module, req);
|
||||||
if (OMPI_ERR_TEMP_OUT_OF_RESOURCE == ret) {
|
if (OMPI_ERR_TEMP_OUT_OF_RESOURCE == OPAL_SOS_GET_ERROR_CODE(ret)) {
|
||||||
opal_list_append(&(module->m_copy_pending_sendreqs), item);
|
opal_list_append(&(module->m_copy_pending_sendreqs), item);
|
||||||
break;
|
break;
|
||||||
} else if (OMPI_SUCCESS != ret) {
|
} else if (OMPI_SUCCESS != ret) {
|
||||||
@ -579,7 +579,7 @@ ompi_osc_rdma_module_unlock(int target,
|
|||||||
(ompi_osc_rdma_sendreq_t*) item;
|
(ompi_osc_rdma_sendreq_t*) item;
|
||||||
|
|
||||||
ret = ompi_osc_rdma_sendreq_send(module, req);
|
ret = ompi_osc_rdma_sendreq_send(module, req);
|
||||||
if (OMPI_ERR_TEMP_OUT_OF_RESOURCE == ret) {
|
if (OMPI_ERR_TEMP_OUT_OF_RESOURCE == OPAL_SOS_GET_ERROR_CODE(ret)) {
|
||||||
opal_list_append(&(module->m_copy_pending_sendreqs), item);
|
opal_list_append(&(module->m_copy_pending_sendreqs), item);
|
||||||
break;
|
break;
|
||||||
} else if (OMPI_SUCCESS != ret) {
|
} else if (OMPI_SUCCESS != ret) {
|
||||||
|
@ -25,6 +25,7 @@
|
|||||||
|
|
||||||
#include "opal/class/opal_list.h"
|
#include "opal/class/opal_list.h"
|
||||||
#include "opal/util/output.h"
|
#include "opal/util/output.h"
|
||||||
|
#include "opal/util/opal_sos.h"
|
||||||
#include "orte/util/show_help.h"
|
#include "orte/util/show_help.h"
|
||||||
#include "opal/runtime/opal_progress.h"
|
#include "opal/runtime/opal_progress.h"
|
||||||
#include "opal/mca/mca.h"
|
#include "opal/mca/mca.h"
|
||||||
@ -357,7 +358,7 @@ mca_pml_base_pml_check_selected(const char *my_pml,
|
|||||||
(void**) &remote_pml, &size);
|
(void**) &remote_pml, &size);
|
||||||
|
|
||||||
/* if modex isn't implemented, then just assume all is well... */
|
/* if modex isn't implemented, then just assume all is well... */
|
||||||
if (OMPI_ERR_NOT_IMPLEMENTED == ret) {
|
if (OMPI_ERR_NOT_IMPLEMENTED == OPAL_SOS_GET_ERROR_CODE(ret)) {
|
||||||
opal_output_verbose( 10, mca_pml_base_output,
|
opal_output_verbose( 10, mca_pml_base_output,
|
||||||
"check:select: modex not implemented");
|
"check:select: modex not implemented");
|
||||||
return OMPI_SUCCESS;
|
return OMPI_SUCCESS;
|
||||||
|
@ -580,7 +580,7 @@ void mca_pml_csum_process_pending_packets(mca_bml_base_btl_t* bml_btl)
|
|||||||
pckt->hdr.hdr_ack.hdr_dst_req.pval,
|
pckt->hdr.hdr_ack.hdr_dst_req.pval,
|
||||||
pckt->hdr.hdr_ack.hdr_send_offset,
|
pckt->hdr.hdr_ack.hdr_send_offset,
|
||||||
pckt->hdr.hdr_common.hdr_flags & MCA_PML_CSUM_HDR_FLAGS_NORDMA);
|
pckt->hdr.hdr_common.hdr_flags & MCA_PML_CSUM_HDR_FLAGS_NORDMA);
|
||||||
if( OPAL_UNLIKELY(OMPI_ERR_OUT_OF_RESOURCE == rc) ) {
|
if( OPAL_UNLIKELY(OMPI_ERR_OUT_OF_RESOURCE == OPAL_SOS_GET_ERROR_CODE(rc)) ) {
|
||||||
OPAL_THREAD_LOCK(&mca_pml_csum.lock);
|
OPAL_THREAD_LOCK(&mca_pml_csum.lock);
|
||||||
opal_list_append(&mca_pml_csum.pckt_pending,
|
opal_list_append(&mca_pml_csum.pckt_pending,
|
||||||
(opal_list_item_t*)pckt);
|
(opal_list_item_t*)pckt);
|
||||||
@ -593,7 +593,7 @@ void mca_pml_csum_process_pending_packets(mca_bml_base_btl_t* bml_btl)
|
|||||||
pckt->hdr.hdr_fin.hdr_des.pval,
|
pckt->hdr.hdr_fin.hdr_des.pval,
|
||||||
pckt->order,
|
pckt->order,
|
||||||
pckt->hdr.hdr_fin.hdr_fail);
|
pckt->hdr.hdr_fin.hdr_fail);
|
||||||
if( OPAL_UNLIKELY(OMPI_ERR_OUT_OF_RESOURCE == rc) ) {
|
if( OPAL_UNLIKELY(OMPI_ERR_OUT_OF_RESOURCE == OPAL_SOS_GET_ERROR_CODE(rc)) ) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
@ -625,7 +625,7 @@ void mca_pml_csum_process_pending_rdma(void)
|
|||||||
} else {
|
} else {
|
||||||
rc = mca_pml_csum_recv_request_get_frag(frag);
|
rc = mca_pml_csum_recv_request_get_frag(frag);
|
||||||
}
|
}
|
||||||
if(OMPI_ERR_OUT_OF_RESOURCE == rc)
|
if(OMPI_ERR_OUT_OF_RESOURCE == OPAL_SOS_GET_ERROR_CODE(rc))
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -14,6 +14,7 @@
|
|||||||
* Copyright (c) 2009 IBM Corporation. All rights reserved.
|
* Copyright (c) 2009 IBM Corporation. All rights reserved.
|
||||||
* Copyright (c) 2009 Los Alamos National Security, LLC. All rights
|
* Copyright (c) 2009 Los Alamos National Security, LLC. All rights
|
||||||
* reserved.
|
* reserved.
|
||||||
|
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
|
||||||
* $COPYRIGHT$
|
* $COPYRIGHT$
|
||||||
*
|
*
|
||||||
* Additional copyrights may follow
|
* Additional copyrights may follow
|
||||||
@ -167,7 +168,7 @@ void mca_pml_csum_recv_frag_callback_match(mca_btl_base_module_t* btl,
|
|||||||
if (csum_received != csum) {
|
if (csum_received != csum) {
|
||||||
opal_output(0, "%s:%s:%d: Invalid \'match header\' - received csum:0x%04x != computed csum:0x%04x\n",
|
opal_output(0, "%s:%s:%d: Invalid \'match header\' - received csum:0x%04x != computed csum:0x%04x\n",
|
||||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), __FILE__, __LINE__, csum_received, csum);
|
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), __FILE__, __LINE__, csum_received, csum);
|
||||||
orte_notifier.log(ORTE_NOTIFIER_INFRA, 1,
|
orte_notifier.log(ORTE_NOTIFIER_CRIT, 1,
|
||||||
"Checksum header violation: job %s file %s line %d",
|
"Checksum header violation: job %s file %s line %d",
|
||||||
(NULL == orte_job_ident) ? "UNKNOWN" : orte_job_ident,
|
(NULL == orte_job_ident) ? "UNKNOWN" : orte_job_ident,
|
||||||
__FILE__, __LINE__);
|
__FILE__, __LINE__);
|
||||||
@ -298,7 +299,7 @@ void mca_pml_csum_recv_frag_callback_match(mca_btl_base_module_t* btl,
|
|||||||
if (csum_data != hdr->hdr_csum) {
|
if (csum_data != hdr->hdr_csum) {
|
||||||
opal_output(0, "%s:%s:%d: Invalid \'match data\' - received csum:0x%x != computed csum:0x%x\n",
|
opal_output(0, "%s:%s:%d: Invalid \'match data\' - received csum:0x%x != computed csum:0x%x\n",
|
||||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), __FILE__, __LINE__, hdr->hdr_csum, csum_data);
|
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), __FILE__, __LINE__, hdr->hdr_csum, csum_data);
|
||||||
orte_notifier.log(ORTE_NOTIFIER_INFRA, 1,
|
orte_notifier.log(ORTE_NOTIFIER_CRIT, 1,
|
||||||
"Checksum data violation: job %s file %s line %d",
|
"Checksum data violation: job %s file %s line %d",
|
||||||
(NULL == orte_job_ident) ? "UNKNOWN" : orte_job_ident,
|
(NULL == orte_job_ident) ? "UNKNOWN" : orte_job_ident,
|
||||||
__FILE__, __LINE__);
|
__FILE__, __LINE__);
|
||||||
@ -347,7 +348,7 @@ void mca_pml_csum_recv_frag_callback_rndv(mca_btl_base_module_t* btl,
|
|||||||
if (csum_received != csum) {
|
if (csum_received != csum) {
|
||||||
opal_output(0, "%s:%s:%d: Invalid \'rndv header\' - received csum:0x%04x != computed csum:0x%04x\n",
|
opal_output(0, "%s:%s:%d: Invalid \'rndv header\' - received csum:0x%04x != computed csum:0x%04x\n",
|
||||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), __FILE__, __LINE__, csum_received, csum);
|
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), __FILE__, __LINE__, csum_received, csum);
|
||||||
orte_notifier.log(ORTE_NOTIFIER_INFRA, 1,
|
orte_notifier.log(ORTE_NOTIFIER_CRIT, 1,
|
||||||
"Checksum header violation: job %s file %s line %d",
|
"Checksum header violation: job %s file %s line %d",
|
||||||
(NULL == orte_job_ident) ? "UNKNOWN" : orte_job_ident,
|
(NULL == orte_job_ident) ? "UNKNOWN" : orte_job_ident,
|
||||||
__FILE__, __LINE__);
|
__FILE__, __LINE__);
|
||||||
@ -405,7 +406,7 @@ void mca_pml_csum_recv_frag_callback_ack(mca_btl_base_module_t* btl,
|
|||||||
if (csum_received != csum) {
|
if (csum_received != csum) {
|
||||||
opal_output(0, "%s:%s:%d: Invalid \'ACK header\' - received csum:0x%04x != computed csum:0x%04x\n",
|
opal_output(0, "%s:%s:%d: Invalid \'ACK header\' - received csum:0x%04x != computed csum:0x%04x\n",
|
||||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), __FILE__, __LINE__, csum_received, csum);
|
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), __FILE__, __LINE__, csum_received, csum);
|
||||||
orte_notifier.log(ORTE_NOTIFIER_INFRA, 1,
|
orte_notifier.log(ORTE_NOTIFIER_CRIT, 1,
|
||||||
"Checksum header violation: job %s file %s line %d",
|
"Checksum header violation: job %s file %s line %d",
|
||||||
(NULL == orte_job_ident) ? "UNKNOWN" : orte_job_ident,
|
(NULL == orte_job_ident) ? "UNKNOWN" : orte_job_ident,
|
||||||
__FILE__, __LINE__);
|
__FILE__, __LINE__);
|
||||||
@ -458,7 +459,7 @@ void mca_pml_csum_recv_frag_callback_frag(mca_btl_base_module_t* btl,
|
|||||||
if(csum_received != csum) {
|
if(csum_received != csum) {
|
||||||
opal_output(0, "%s:%s:%d: Invalid \'frag header\' - received csum:0x%04x != computed csum:0x%04x\n",
|
opal_output(0, "%s:%s:%d: Invalid \'frag header\' - received csum:0x%04x != computed csum:0x%04x\n",
|
||||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), __FILE__, __LINE__, csum_received, csum);
|
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), __FILE__, __LINE__, csum_received, csum);
|
||||||
orte_notifier.log(ORTE_NOTIFIER_INFRA, 1,
|
orte_notifier.log(ORTE_NOTIFIER_CRIT, 1,
|
||||||
"Checksum header violation: job %s file %s line %d",
|
"Checksum header violation: job %s file %s line %d",
|
||||||
(NULL == orte_job_ident) ? "UNKNOWN" : orte_job_ident,
|
(NULL == orte_job_ident) ? "UNKNOWN" : orte_job_ident,
|
||||||
__FILE__, __LINE__);
|
__FILE__, __LINE__);
|
||||||
@ -500,7 +501,7 @@ void mca_pml_csum_recv_frag_callback_put(mca_btl_base_module_t* btl,
|
|||||||
if(csum_received != csum) {
|
if(csum_received != csum) {
|
||||||
opal_output(0, "%s:%s:%d: Invalid \'PUT header\' - received csum:0x%04x != computed csum:0x%04x\n",
|
opal_output(0, "%s:%s:%d: Invalid \'PUT header\' - received csum:0x%04x != computed csum:0x%04x\n",
|
||||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), __FILE__, __LINE__, csum_received, csum);
|
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), __FILE__, __LINE__, csum_received, csum);
|
||||||
orte_notifier.log(ORTE_NOTIFIER_INFRA, 1,
|
orte_notifier.log(ORTE_NOTIFIER_CRIT, 1,
|
||||||
"Checksum header violation: job %s file %s line %d",
|
"Checksum header violation: job %s file %s line %d",
|
||||||
(NULL == orte_job_ident) ? "UNKNOWN" : orte_job_ident,
|
(NULL == orte_job_ident) ? "UNKNOWN" : orte_job_ident,
|
||||||
__FILE__, __LINE__);
|
__FILE__, __LINE__);
|
||||||
@ -542,7 +543,7 @@ void mca_pml_csum_recv_frag_callback_fin(mca_btl_base_module_t* btl,
|
|||||||
if(csum_received != csum) {
|
if(csum_received != csum) {
|
||||||
opal_output(0, "%s:%s:%d: Invalid \'FIN header\' - received csum:0x%04x != computed csum:0x%04x\n",
|
opal_output(0, "%s:%s:%d: Invalid \'FIN header\' - received csum:0x%04x != computed csum:0x%04x\n",
|
||||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), __FILE__, __LINE__, csum_received, csum);
|
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), __FILE__, __LINE__, csum_received, csum);
|
||||||
orte_notifier.log(ORTE_NOTIFIER_INFRA, 1,
|
orte_notifier.log(ORTE_NOTIFIER_CRIT, 1,
|
||||||
"Checksum header violation: job %s file %s line %d",
|
"Checksum header violation: job %s file %s line %d",
|
||||||
(NULL == orte_job_ident) ? "UNKNOWN" : orte_job_ident,
|
(NULL == orte_job_ident) ? "UNKNOWN" : orte_job_ident,
|
||||||
__FILE__, __LINE__);
|
__FILE__, __LINE__);
|
||||||
|
@ -13,6 +13,7 @@
|
|||||||
* Copyright (c) 2009 IBM Corporation. All rights reserved.
|
* Copyright (c) 2009 IBM Corporation. All rights reserved.
|
||||||
* Copyright (c) 2009 Los Alamos National Security, LLC. All rights
|
* Copyright (c) 2009 Los Alamos National Security, LLC. All rights
|
||||||
* reserved.
|
* reserved.
|
||||||
|
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
|
||||||
* $COPYRIGHT$
|
* $COPYRIGHT$
|
||||||
*
|
*
|
||||||
* Additional copyrights may follow
|
* Additional copyrights may follow
|
||||||
@ -80,7 +81,7 @@ void mca_pml_csum_recv_request_process_pending(void)
|
|||||||
if( OPAL_UNLIKELY(NULL == recvreq) )
|
if( OPAL_UNLIKELY(NULL == recvreq) )
|
||||||
break;
|
break;
|
||||||
recvreq->req_pending = false;
|
recvreq->req_pending = false;
|
||||||
if(mca_pml_csum_recv_request_schedule_exclusive(recvreq, NULL) ==
|
if(OPAL_SOS_GET_ERROR_CODE(mca_pml_csum_recv_request_schedule_exclusive(recvreq, NULL)) ==
|
||||||
OMPI_ERR_OUT_OF_RESOURCE)
|
OMPI_ERR_OUT_OF_RESOURCE)
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -423,7 +424,7 @@ int mca_pml_csum_recv_request_get_frag( mca_pml_csum_rdma_frag_t* frag )
|
|||||||
/* queue up get request */
|
/* queue up get request */
|
||||||
rc = mca_bml_base_get(bml_btl,descriptor);
|
rc = mca_bml_base_get(bml_btl,descriptor);
|
||||||
if( OPAL_UNLIKELY(OMPI_SUCCESS != rc) ) {
|
if( OPAL_UNLIKELY(OMPI_SUCCESS != rc) ) {
|
||||||
if(OMPI_ERR_OUT_OF_RESOURCE == rc) {
|
if(OMPI_ERR_OUT_OF_RESOURCE == OPAL_SOS_GET_ERROR_CODE(rc)) {
|
||||||
mca_bml_base_free(bml_btl, descriptor);
|
mca_bml_base_free(bml_btl, descriptor);
|
||||||
OPAL_THREAD_LOCK(&mca_pml_csum.lock);
|
OPAL_THREAD_LOCK(&mca_pml_csum.lock);
|
||||||
opal_list_append(&mca_pml_csum.rdma_pending,
|
opal_list_append(&mca_pml_csum.rdma_pending,
|
||||||
@ -496,7 +497,7 @@ void mca_pml_csum_recv_request_progress_frag( mca_pml_csum_recv_request_t* recvr
|
|||||||
if(csum != hdr->hdr_frag.hdr_csum) {
|
if(csum != hdr->hdr_frag.hdr_csum) {
|
||||||
opal_output(0, "%s:%s:%d: Invalid \'frag data\' - received csum:0x%x != computed csum:0x%x\n",
|
opal_output(0, "%s:%s:%d: Invalid \'frag data\' - received csum:0x%x != computed csum:0x%x\n",
|
||||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), __FILE__, __LINE__, hdr->hdr_frag.hdr_csum, csum);
|
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), __FILE__, __LINE__, hdr->hdr_frag.hdr_csum, csum);
|
||||||
orte_notifier.log(ORTE_NOTIFIER_INFRA, 1,
|
orte_notifier.log(ORTE_NOTIFIER_CRIT, 1,
|
||||||
"Checksum data violation: job %s file %s line %d",
|
"Checksum data violation: job %s file %s line %d",
|
||||||
(NULL == orte_job_ident) ? "UNKNOWN" : orte_job_ident,
|
(NULL == orte_job_ident) ? "UNKNOWN" : orte_job_ident,
|
||||||
__FILE__, __LINE__);
|
__FILE__, __LINE__);
|
||||||
@ -642,7 +643,7 @@ void mca_pml_csum_recv_request_progress_rndv( mca_pml_csum_recv_request_t* recvr
|
|||||||
if (csum != hdr->hdr_match.hdr_csum) {
|
if (csum != hdr->hdr_match.hdr_csum) {
|
||||||
opal_output(0, "%s:%s:%d: Invalid \'rndv data\' - received csum:0x%x != computed csum:0x%x\n",
|
opal_output(0, "%s:%s:%d: Invalid \'rndv data\' - received csum:0x%x != computed csum:0x%x\n",
|
||||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), __FILE__, __LINE__, hdr->hdr_match.hdr_csum, csum);
|
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), __FILE__, __LINE__, hdr->hdr_match.hdr_csum, csum);
|
||||||
orte_notifier.log(ORTE_NOTIFIER_INFRA, 1,
|
orte_notifier.log(ORTE_NOTIFIER_CRIT, 1,
|
||||||
"Checksum data violation: job %s file %s line %d",
|
"Checksum data violation: job %s file %s line %d",
|
||||||
(NULL == orte_job_ident) ? "UNKNOWN" : orte_job_ident,
|
(NULL == orte_job_ident) ? "UNKNOWN" : orte_job_ident,
|
||||||
__FILE__, __LINE__);
|
__FILE__, __LINE__);
|
||||||
@ -704,7 +705,7 @@ void mca_pml_csum_recv_request_progress_match( mca_pml_csum_recv_request_t* recv
|
|||||||
if (csum != hdr->hdr_match.hdr_csum) {
|
if (csum != hdr->hdr_match.hdr_csum) {
|
||||||
opal_output(0, "%s:%s:%d: Invalid \'match data\' - received csum:0x%x != computed csum:0x%x\n",
|
opal_output(0, "%s:%s:%d: Invalid \'match data\' - received csum:0x%x != computed csum:0x%x\n",
|
||||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), __FILE__, __LINE__, hdr->hdr_match.hdr_csum, csum);
|
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), __FILE__, __LINE__, hdr->hdr_match.hdr_csum, csum);
|
||||||
orte_notifier.log(ORTE_NOTIFIER_INFRA, 1,
|
orte_notifier.log(ORTE_NOTIFIER_CRIT, 1,
|
||||||
"Checksum data violation: job %s file %s line %d",
|
"Checksum data violation: job %s file %s line %d",
|
||||||
(NULL == orte_job_ident) ? "UNKNOWN" : orte_job_ident,
|
(NULL == orte_job_ident) ? "UNKNOWN" : orte_job_ident,
|
||||||
__FILE__, __LINE__);
|
__FILE__, __LINE__);
|
||||||
|
@ -346,7 +346,7 @@ static inline int mca_pml_csum_recv_request_schedule_exclusive(
|
|||||||
|
|
||||||
do {
|
do {
|
||||||
rc = mca_pml_csum_recv_request_schedule_once(req, start_bml_btl);
|
rc = mca_pml_csum_recv_request_schedule_once(req, start_bml_btl);
|
||||||
if(rc == OMPI_ERR_OUT_OF_RESOURCE)
|
if(OPAL_SOS_GET_ERROR_CODE(rc) == OMPI_ERR_OUT_OF_RESOURCE)
|
||||||
break;
|
break;
|
||||||
} while(!unlock_recv_request(req));
|
} while(!unlock_recv_request(req));
|
||||||
|
|
||||||
|
@ -64,7 +64,7 @@ void mca_pml_csum_send_request_process_pending(mca_bml_base_btl_t *bml_btl)
|
|||||||
|
|
||||||
switch(pending_type) {
|
switch(pending_type) {
|
||||||
case MCA_PML_CSUM_SEND_PENDING_SCHEDULE:
|
case MCA_PML_CSUM_SEND_PENDING_SCHEDULE:
|
||||||
if(mca_pml_csum_send_request_schedule_exclusive(sendreq) ==
|
if(OPAL_SOS_GET_ERROR_CODE(mca_pml_csum_send_request_schedule_exclusive(sendreq)) ==
|
||||||
OMPI_ERR_OUT_OF_RESOURCE) {
|
OMPI_ERR_OUT_OF_RESOURCE) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@ -73,7 +73,7 @@ void mca_pml_csum_send_request_process_pending(mca_bml_base_btl_t *bml_btl)
|
|||||||
send_dst = mca_bml_base_btl_array_find(
|
send_dst = mca_bml_base_btl_array_find(
|
||||||
&sendreq->req_endpoint->btl_eager, bml_btl->btl);
|
&sendreq->req_endpoint->btl_eager, bml_btl->btl);
|
||||||
if( (NULL == send_dst) ||
|
if( (NULL == send_dst) ||
|
||||||
(mca_pml_csum_send_request_start_btl(sendreq, send_dst) ==
|
(OPAL_SOS_GET_ERROR_CODE(mca_pml_csum_send_request_start_btl(sendreq, send_dst)) ==
|
||||||
OMPI_ERR_OUT_OF_RESOURCE) ) {
|
OMPI_ERR_OUT_OF_RESOURCE) ) {
|
||||||
/* prepend to the pending list to minimize reordering in case
|
/* prepend to the pending list to minimize reordering in case
|
||||||
* send_dst != 0 */
|
* send_dst != 0 */
|
||||||
@ -586,7 +586,7 @@ int mca_pml_csum_send_request_start_copy( mca_pml_csum_send_request_t* sendreq,
|
|||||||
}
|
}
|
||||||
return OMPI_SUCCESS;
|
return OMPI_SUCCESS;
|
||||||
}
|
}
|
||||||
switch(rc) {
|
switch(OPAL_SOS_GET_ERROR_CODE(rc)) {
|
||||||
case OMPI_ERR_RESOURCE_BUSY:
|
case OMPI_ERR_RESOURCE_BUSY:
|
||||||
/* No more resources. Allow the upper level to queue the send */
|
/* No more resources. Allow the upper level to queue the send */
|
||||||
rc = OMPI_ERR_OUT_OF_RESOURCE;
|
rc = OMPI_ERR_OUT_OF_RESOURCE;
|
||||||
@ -1251,7 +1251,7 @@ int mca_pml_csum_send_request_put_frag( mca_pml_csum_rdma_frag_t* frag )
|
|||||||
if( OPAL_UNLIKELY(OMPI_SUCCESS != rc) ) {
|
if( OPAL_UNLIKELY(OMPI_SUCCESS != rc) ) {
|
||||||
mca_bml_base_free(bml_btl, des);
|
mca_bml_base_free(bml_btl, des);
|
||||||
frag->rdma_length = save_size;
|
frag->rdma_length = save_size;
|
||||||
if(OMPI_ERR_OUT_OF_RESOURCE == rc) {
|
if(OMPI_ERR_OUT_OF_RESOURCE == OPAL_SOS_GET_ERROR_CODE(rc)) {
|
||||||
OPAL_THREAD_LOCK(&mca_pml_csum.lock);
|
OPAL_THREAD_LOCK(&mca_pml_csum.lock);
|
||||||
opal_list_append(&mca_pml_csum.rdma_pending, (opal_list_item_t*)frag);
|
opal_list_append(&mca_pml_csum.rdma_pending, (opal_list_item_t*)frag);
|
||||||
OPAL_THREAD_UNLOCK(&mca_pml_csum.lock);
|
OPAL_THREAD_UNLOCK(&mca_pml_csum.lock);
|
||||||
|
@ -284,7 +284,7 @@ mca_pml_csum_send_request_schedule_exclusive(mca_pml_csum_send_request_t* sendre
|
|||||||
int rc;
|
int rc;
|
||||||
do {
|
do {
|
||||||
rc = mca_pml_csum_send_request_schedule_once(sendreq);
|
rc = mca_pml_csum_send_request_schedule_once(sendreq);
|
||||||
if(rc == OMPI_ERR_OUT_OF_RESOURCE)
|
if(OPAL_SOS_GET_ERROR_CODE(rc) == OMPI_ERR_OUT_OF_RESOURCE)
|
||||||
break;
|
break;
|
||||||
} while(!unlock_send_request(sendreq));
|
} while(!unlock_send_request(sendreq));
|
||||||
|
|
||||||
@ -432,7 +432,7 @@ mca_pml_csum_send_request_start( mca_pml_csum_send_request_t* sendreq )
|
|||||||
/* select a btl */
|
/* select a btl */
|
||||||
bml_btl = mca_bml_base_btl_array_get_next(&endpoint->btl_eager);
|
bml_btl = mca_bml_base_btl_array_get_next(&endpoint->btl_eager);
|
||||||
rc = mca_pml_csum_send_request_start_btl(sendreq, bml_btl);
|
rc = mca_pml_csum_send_request_start_btl(sendreq, bml_btl);
|
||||||
if( OPAL_LIKELY(OMPI_ERR_OUT_OF_RESOURCE != rc) )
|
if( OPAL_LIKELY(OMPI_ERR_OUT_OF_RESOURCE != OPAL_SOS_GET_ERROR_CODE(rc)) )
|
||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
add_request_to_send_pending(sendreq, MCA_PML_CSUM_SEND_PENDING_START, true);
|
add_request_to_send_pending(sendreq, MCA_PML_CSUM_SEND_PENDING_START, true);
|
||||||
|
@ -102,7 +102,7 @@ static void mca_pml_dr_error_completion(
|
|||||||
mca_pml_dr_vfrag_t* vfrag = (mca_pml_dr_vfrag_t*)descriptor->des_cbdata;
|
mca_pml_dr_vfrag_t* vfrag = (mca_pml_dr_vfrag_t*)descriptor->des_cbdata;
|
||||||
mca_pml_dr_send_request_t* sendreq = (mca_pml_dr_send_request_t*)vfrag->vf_send.pval;
|
mca_pml_dr_send_request_t* sendreq = (mca_pml_dr_send_request_t*)vfrag->vf_send.pval;
|
||||||
|
|
||||||
switch(status) {
|
switch(OPAL_SOS_GET_ERROR_CODE(status)) {
|
||||||
case OMPI_ERR_UNREACH:
|
case OMPI_ERR_UNREACH:
|
||||||
case OMPI_ERR_CONNECTION_FAILED:
|
case OMPI_ERR_CONNECTION_FAILED:
|
||||||
case OMPI_ERR_CONNECTION_REFUSED:
|
case OMPI_ERR_CONNECTION_REFUSED:
|
||||||
|
@ -549,7 +549,7 @@ void mca_pml_ob1_process_pending_packets(mca_bml_base_btl_t* bml_btl)
|
|||||||
pckt->hdr.hdr_ack.hdr_dst_req.pval,
|
pckt->hdr.hdr_ack.hdr_dst_req.pval,
|
||||||
pckt->hdr.hdr_ack.hdr_send_offset,
|
pckt->hdr.hdr_ack.hdr_send_offset,
|
||||||
pckt->hdr.hdr_common.hdr_flags & MCA_PML_OB1_HDR_FLAGS_NORDMA);
|
pckt->hdr.hdr_common.hdr_flags & MCA_PML_OB1_HDR_FLAGS_NORDMA);
|
||||||
if( OPAL_UNLIKELY(OMPI_ERR_OUT_OF_RESOURCE == rc) ) {
|
if( OPAL_UNLIKELY(OMPI_ERR_OUT_OF_RESOURCE == OPAL_SOS_GET_ERROR_CODE(rc)) ) {
|
||||||
OPAL_THREAD_LOCK(&mca_pml_ob1.lock);
|
OPAL_THREAD_LOCK(&mca_pml_ob1.lock);
|
||||||
opal_list_append(&mca_pml_ob1.pckt_pending,
|
opal_list_append(&mca_pml_ob1.pckt_pending,
|
||||||
(opal_list_item_t*)pckt);
|
(opal_list_item_t*)pckt);
|
||||||
@ -562,7 +562,7 @@ void mca_pml_ob1_process_pending_packets(mca_bml_base_btl_t* bml_btl)
|
|||||||
pckt->hdr.hdr_fin.hdr_des,
|
pckt->hdr.hdr_fin.hdr_des,
|
||||||
pckt->order,
|
pckt->order,
|
||||||
pckt->hdr.hdr_fin.hdr_fail);
|
pckt->hdr.hdr_fin.hdr_fail);
|
||||||
if( OPAL_UNLIKELY(OMPI_ERR_OUT_OF_RESOURCE == rc) ) {
|
if( OPAL_UNLIKELY(OMPI_ERR_OUT_OF_RESOURCE == OPAL_SOS_GET_ERROR_CODE(rc)) ) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
@ -594,7 +594,7 @@ void mca_pml_ob1_process_pending_rdma(void)
|
|||||||
} else {
|
} else {
|
||||||
rc = mca_pml_ob1_recv_request_get_frag(frag);
|
rc = mca_pml_ob1_recv_request_get_frag(frag);
|
||||||
}
|
}
|
||||||
if(OMPI_ERR_OUT_OF_RESOURCE == rc)
|
if(OMPI_ERR_OUT_OF_RESOURCE == OPAL_SOS_GET_ERROR_CODE(rc))
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -46,7 +46,7 @@ void mca_pml_ob1_recv_request_process_pending(void)
|
|||||||
if( OPAL_UNLIKELY(NULL == recvreq) )
|
if( OPAL_UNLIKELY(NULL == recvreq) )
|
||||||
break;
|
break;
|
||||||
recvreq->req_pending = false;
|
recvreq->req_pending = false;
|
||||||
if(mca_pml_ob1_recv_request_schedule_exclusive(recvreq, NULL) ==
|
if(OPAL_SOS_GET_ERROR_CODE(mca_pml_ob1_recv_request_schedule_exclusive(recvreq, NULL)) ==
|
||||||
OMPI_ERR_OUT_OF_RESOURCE)
|
OMPI_ERR_OUT_OF_RESOURCE)
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -384,7 +384,7 @@ int mca_pml_ob1_recv_request_get_frag( mca_pml_ob1_rdma_frag_t* frag )
|
|||||||
/* queue up get request */
|
/* queue up get request */
|
||||||
rc = mca_bml_base_get(bml_btl,descriptor);
|
rc = mca_bml_base_get(bml_btl,descriptor);
|
||||||
if( OPAL_UNLIKELY(OMPI_SUCCESS != rc) ) {
|
if( OPAL_UNLIKELY(OMPI_SUCCESS != rc) ) {
|
||||||
if(OMPI_ERR_OUT_OF_RESOURCE == rc) {
|
if(OMPI_ERR_OUT_OF_RESOURCE == OPAL_SOS_GET_ERROR_CODE(rc)) {
|
||||||
mca_bml_base_free(bml_btl, descriptor);
|
mca_bml_base_free(bml_btl, descriptor);
|
||||||
OPAL_THREAD_LOCK(&mca_pml_ob1.lock);
|
OPAL_THREAD_LOCK(&mca_pml_ob1.lock);
|
||||||
opal_list_append(&mca_pml_ob1.rdma_pending,
|
opal_list_append(&mca_pml_ob1.rdma_pending,
|
||||||
|
@ -347,7 +347,7 @@ static inline int mca_pml_ob1_recv_request_schedule_exclusive(
|
|||||||
|
|
||||||
do {
|
do {
|
||||||
rc = mca_pml_ob1_recv_request_schedule_once(req, start_bml_btl);
|
rc = mca_pml_ob1_recv_request_schedule_once(req, start_bml_btl);
|
||||||
if(rc == OMPI_ERR_OUT_OF_RESOURCE)
|
if(OPAL_SOS_GET_ERROR_CODE(rc) == OMPI_ERR_OUT_OF_RESOURCE)
|
||||||
break;
|
break;
|
||||||
} while(!unlock_recv_request(req));
|
} while(!unlock_recv_request(req));
|
||||||
|
|
||||||
|
@ -52,7 +52,7 @@ void mca_pml_ob1_send_request_process_pending(mca_bml_base_btl_t *bml_btl)
|
|||||||
|
|
||||||
switch(pending_type) {
|
switch(pending_type) {
|
||||||
case MCA_PML_OB1_SEND_PENDING_SCHEDULE:
|
case MCA_PML_OB1_SEND_PENDING_SCHEDULE:
|
||||||
if(mca_pml_ob1_send_request_schedule_exclusive(sendreq) ==
|
if(OPAL_SOS_GET_ERROR_CODE(mca_pml_ob1_send_request_schedule_exclusive(sendreq)) ==
|
||||||
OMPI_ERR_OUT_OF_RESOURCE) {
|
OMPI_ERR_OUT_OF_RESOURCE) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@ -61,8 +61,8 @@ void mca_pml_ob1_send_request_process_pending(mca_bml_base_btl_t *bml_btl)
|
|||||||
send_dst = mca_bml_base_btl_array_find(
|
send_dst = mca_bml_base_btl_array_find(
|
||||||
&sendreq->req_endpoint->btl_eager, bml_btl->btl);
|
&sendreq->req_endpoint->btl_eager, bml_btl->btl);
|
||||||
if( (NULL == send_dst) ||
|
if( (NULL == send_dst) ||
|
||||||
(mca_pml_ob1_send_request_start_btl(sendreq, send_dst) ==
|
(OPAL_SOS_GET_ERROR_CODE(mca_pml_ob1_send_request_start_btl(sendreq, send_dst)) ==
|
||||||
OMPI_ERR_OUT_OF_RESOURCE) ) {
|
OMPI_ERR_OUT_OF_RESOURCE) ) {
|
||||||
/* prepend to the pending list to minimize reordering in case
|
/* prepend to the pending list to minimize reordering in case
|
||||||
* send_dst != 0 */
|
* send_dst != 0 */
|
||||||
add_request_to_send_pending(sendreq,
|
add_request_to_send_pending(sendreq,
|
||||||
@ -541,7 +541,7 @@ int mca_pml_ob1_send_request_start_copy( mca_pml_ob1_send_request_t* sendreq,
|
|||||||
}
|
}
|
||||||
return OMPI_SUCCESS;
|
return OMPI_SUCCESS;
|
||||||
}
|
}
|
||||||
switch(rc) {
|
switch(OPAL_SOS_GET_ERROR_CODE(rc)) {
|
||||||
case OMPI_ERR_RESOURCE_BUSY:
|
case OMPI_ERR_RESOURCE_BUSY:
|
||||||
/* No more resources. Allow the upper level to queue the send */
|
/* No more resources. Allow the upper level to queue the send */
|
||||||
rc = OMPI_ERR_OUT_OF_RESOURCE;
|
rc = OMPI_ERR_OUT_OF_RESOURCE;
|
||||||
@ -1173,7 +1173,7 @@ int mca_pml_ob1_send_request_put_frag( mca_pml_ob1_rdma_frag_t* frag )
|
|||||||
if( OPAL_UNLIKELY(OMPI_SUCCESS != rc) ) {
|
if( OPAL_UNLIKELY(OMPI_SUCCESS != rc) ) {
|
||||||
mca_bml_base_free(bml_btl, des);
|
mca_bml_base_free(bml_btl, des);
|
||||||
frag->rdma_length = save_size;
|
frag->rdma_length = save_size;
|
||||||
if(OMPI_ERR_OUT_OF_RESOURCE == rc) {
|
if(OMPI_ERR_OUT_OF_RESOURCE == OPAL_SOS_GET_ERROR_CODE(rc)) {
|
||||||
OPAL_THREAD_LOCK(&mca_pml_ob1.lock);
|
OPAL_THREAD_LOCK(&mca_pml_ob1.lock);
|
||||||
opal_list_append(&mca_pml_ob1.rdma_pending, (opal_list_item_t*)frag);
|
opal_list_append(&mca_pml_ob1.rdma_pending, (opal_list_item_t*)frag);
|
||||||
OPAL_THREAD_UNLOCK(&mca_pml_ob1.lock);
|
OPAL_THREAD_UNLOCK(&mca_pml_ob1.lock);
|
||||||
|
@ -284,7 +284,7 @@ mca_pml_ob1_send_request_schedule_exclusive(mca_pml_ob1_send_request_t* sendreq)
|
|||||||
int rc;
|
int rc;
|
||||||
do {
|
do {
|
||||||
rc = mca_pml_ob1_send_request_schedule_once(sendreq);
|
rc = mca_pml_ob1_send_request_schedule_once(sendreq);
|
||||||
if(rc == OMPI_ERR_OUT_OF_RESOURCE)
|
if(OPAL_SOS_GET_ERROR_CODE(rc) == OMPI_ERR_OUT_OF_RESOURCE)
|
||||||
break;
|
break;
|
||||||
} while(!unlock_send_request(sendreq));
|
} while(!unlock_send_request(sendreq));
|
||||||
|
|
||||||
@ -432,7 +432,7 @@ mca_pml_ob1_send_request_start( mca_pml_ob1_send_request_t* sendreq )
|
|||||||
/* select a btl */
|
/* select a btl */
|
||||||
bml_btl = mca_bml_base_btl_array_get_next(&endpoint->btl_eager);
|
bml_btl = mca_bml_base_btl_array_get_next(&endpoint->btl_eager);
|
||||||
rc = mca_pml_ob1_send_request_start_btl(sendreq, bml_btl);
|
rc = mca_pml_ob1_send_request_start_btl(sendreq, bml_btl);
|
||||||
if( OPAL_LIKELY(OMPI_ERR_OUT_OF_RESOURCE != rc) )
|
if( OPAL_LIKELY(OMPI_ERR_OUT_OF_RESOURCE != OPAL_SOS_GET_ERROR_CODE(rc)) )
|
||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
add_request_to_send_pending(sendreq, MCA_PML_OB1_SEND_PENDING_START, true);
|
add_request_to_send_pending(sendreq, MCA_PML_OB1_SEND_PENDING_START, true);
|
||||||
|
@ -10,6 +10,7 @@
|
|||||||
|
|
||||||
#include "ompi_config.h"
|
#include "ompi_config.h"
|
||||||
#include "vprotocol_pessimist_eventlog.h"
|
#include "vprotocol_pessimist_eventlog.h"
|
||||||
|
#include "opal/util/opal_sos.h"
|
||||||
|
|
||||||
#include "orte/mca/rml/rml.h"
|
#include "orte/mca/rml/rml.h"
|
||||||
#include "orte/mca/rml/base/rml_contact.h"
|
#include "orte/mca/rml/base/rml_contact.h"
|
||||||
|
@ -18,6 +18,7 @@
|
|||||||
|
|
||||||
#include "opal/mca/mca.h"
|
#include "opal/mca/mca.h"
|
||||||
#include "opal/mca/base/base.h"
|
#include "opal/mca/base/base.h"
|
||||||
|
#include "opal/util/opal_sos.h"
|
||||||
|
|
||||||
#include "opal/mca/base/mca_base_param.h"
|
#include "opal/mca/base/mca_base_param.h"
|
||||||
#include "opal/mca/base/mca_base_component_repository.h"
|
#include "opal/mca/base/mca_base_component_repository.h"
|
||||||
@ -40,7 +41,7 @@ int ompi_pubsub_base_select(void)
|
|||||||
(mca_base_module_t **) &best_module,
|
(mca_base_module_t **) &best_module,
|
||||||
(mca_base_component_t **) &best_component))) {
|
(mca_base_component_t **) &best_component))) {
|
||||||
/* it is okay not to find any executable components */
|
/* it is okay not to find any executable components */
|
||||||
if (OMPI_ERR_NOT_FOUND == ret) {
|
if (OMPI_ERR_NOT_FOUND == OPAL_SOS_GET_ERROR_CODE(ret)) {
|
||||||
ret = OPAL_SUCCESS;
|
ret = OPAL_SUCCESS;
|
||||||
}
|
}
|
||||||
goto cleanup;
|
goto cleanup;
|
||||||
|
@ -26,6 +26,7 @@
|
|||||||
|
|
||||||
#include "orte/util/show_help.h"
|
#include "orte/util/show_help.h"
|
||||||
#include "opal/util/argv.h"
|
#include "opal/util/argv.h"
|
||||||
|
#include "opal/util/opal_sos.h"
|
||||||
|
|
||||||
#include "opal/dss/dss.h"
|
#include "opal/dss/dss.h"
|
||||||
#include "orte/mca/errmgr/errmgr.h"
|
#include "orte/mca/errmgr/errmgr.h"
|
||||||
|
@ -121,7 +121,7 @@ int mca_rcache_rb_insert (
|
|||||||
if(flags & MCA_MPOOL_FLAGS_CACHE) {
|
if(flags & MCA_MPOOL_FLAGS_CACHE) {
|
||||||
rc = mca_rcache_rb_mru_insert( (mca_rcache_rb_module_t*) rcache, reg);
|
rc = mca_rcache_rb_mru_insert( (mca_rcache_rb_module_t*) rcache, reg);
|
||||||
if(OMPI_SUCCESS != rc) {
|
if(OMPI_SUCCESS != rc) {
|
||||||
if(OMPI_ERR_TEMP_OUT_OF_RESOURCE == rc) {
|
if(OMPI_ERR_TEMP_OUT_OF_RESOURCE == OPAL_SOS_GET_ERROR_CODE(rc)) {
|
||||||
/*
|
/*
|
||||||
* If the registration is too big for the rcache,
|
* If the registration is too big for the rcache,
|
||||||
* don't cache it and reset the flags so the upper level
|
* don't cache it and reset the flags so the upper level
|
||||||
|
@ -25,6 +25,8 @@
|
|||||||
#include "ompi/info/info.h"
|
#include "ompi/info/info.h"
|
||||||
#include "ompi/mca/pubsub/pubsub.h"
|
#include "ompi/mca/pubsub/pubsub.h"
|
||||||
|
|
||||||
|
#include "opal/util/opal_sos.h"
|
||||||
|
|
||||||
#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES
|
#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES
|
||||||
#pragma weak MPI_Unpublish_name = PMPI_Unpublish_name
|
#pragma weak MPI_Unpublish_name = PMPI_Unpublish_name
|
||||||
#endif
|
#endif
|
||||||
@ -66,13 +68,13 @@ int MPI_Unpublish_name(char *service_name, MPI_Info info,
|
|||||||
*/
|
*/
|
||||||
rc = ompi_pubsub.unpublish(service_name, info);
|
rc = ompi_pubsub.unpublish(service_name, info);
|
||||||
if ( OMPI_SUCCESS != rc ) {
|
if ( OMPI_SUCCESS != rc ) {
|
||||||
if (OMPI_ERR_NOT_FOUND == rc) {
|
if (OMPI_ERR_NOT_FOUND == OPAL_SOS_GET_ERROR_CODE(rc)) {
|
||||||
/* service couldn't be found */
|
/* service couldn't be found */
|
||||||
OPAL_CR_EXIT_LIBRARY();
|
OPAL_CR_EXIT_LIBRARY();
|
||||||
return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_SERVICE,
|
return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_SERVICE,
|
||||||
FUNC_NAME);
|
FUNC_NAME);
|
||||||
}
|
}
|
||||||
if (OMPI_ERR_PERM == rc) {
|
if (OMPI_ERR_PERM == OPAL_SOS_GET_ERROR_CODE(rc)) {
|
||||||
/* this process didn't own the specified service */
|
/* this process didn't own the specified service */
|
||||||
OPAL_CR_EXIT_LIBRARY();
|
OPAL_CR_EXIT_LIBRARY();
|
||||||
return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_ACCESS,
|
return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_ACCESS,
|
||||||
|
@ -235,7 +235,7 @@ ompi_op_t *ompi_op_create_user(bool commute,
|
|||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (OMPI_ERROR == new_op->o_f_to_c_index) {
|
if (OMPI_SUCCESS != new_op->o_f_to_c_index) {
|
||||||
OBJ_RELEASE(new_op);
|
OBJ_RELEASE(new_op);
|
||||||
new_op = NULL;
|
new_op = NULL;
|
||||||
goto error;
|
goto error;
|
||||||
|
@ -26,6 +26,7 @@
|
|||||||
#include "opal/threads/mutex.h"
|
#include "opal/threads/mutex.h"
|
||||||
#include "opal/dss/dss.h"
|
#include "opal/dss/dss.h"
|
||||||
#include "opal/util/arch.h"
|
#include "opal/util/arch.h"
|
||||||
|
#include "opal/util/opal_sos.h"
|
||||||
|
|
||||||
#include "orte/mca/errmgr/errmgr.h"
|
#include "orte/mca/errmgr/errmgr.h"
|
||||||
#include "orte/mca/ess/ess.h"
|
#include "orte/mca/ess/ess.h"
|
||||||
@ -168,7 +169,7 @@ int ompi_proc_set_arch(void)
|
|||||||
return OMPI_ERR_NOT_SUPPORTED;
|
return OMPI_ERR_NOT_SUPPORTED;
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
} else if (OMPI_ERR_NOT_IMPLEMENTED == ret) {
|
} else if (OMPI_ERR_NOT_IMPLEMENTED == OPAL_SOS_GET_ERROR_CODE(ret)) {
|
||||||
proc->proc_arch = opal_local_arch;
|
proc->proc_arch = opal_local_arch;
|
||||||
} else {
|
} else {
|
||||||
OPAL_THREAD_UNLOCK(&ompi_proc_lock);
|
OPAL_THREAD_UNLOCK(&ompi_proc_lock);
|
||||||
|
@ -57,7 +57,6 @@
|
|||||||
#include "orte/mca/odls/base/base.h"
|
#include "orte/mca/odls/base/base.h"
|
||||||
#include "orte/mca/errmgr/errmgr.h"
|
#include "orte/mca/errmgr/errmgr.h"
|
||||||
#include "orte/util/name_fns.h"
|
#include "orte/util/name_fns.h"
|
||||||
|
|
||||||
#include "orte/mca/notifier/notifier.h"
|
#include "orte/mca/notifier/notifier.h"
|
||||||
|
|
||||||
#include "ompi/constants.h"
|
#include "ompi/constants.h"
|
||||||
@ -443,7 +442,7 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
|
|||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
ret = opal_paffinity_base_slot_list_set((long)ORTE_PROC_MY_NAME->vpid, opal_paffinity_base_slot_list, &mask);
|
ret = opal_paffinity_base_slot_list_set((long)ORTE_PROC_MY_NAME->vpid, opal_paffinity_base_slot_list, &mask);
|
||||||
if (OPAL_SUCCESS != ret && OPAL_ERR_NOT_FOUND != ret) {
|
if (OPAL_SUCCESS != ret && OPAL_ERR_NOT_FOUND != OPAL_SOS_GET_ERROR_CODE(ret)) {
|
||||||
error = "opal_paffinity_base_slot_list_set() returned an error";
|
error = "opal_paffinity_base_slot_list_set() returned an error";
|
||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
@ -746,7 +745,7 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
|
|||||||
/* If we got "unreachable", then print a specific error message.
|
/* If we got "unreachable", then print a specific error message.
|
||||||
Otherwise, if we got some other failure, fall through to print
|
Otherwise, if we got some other failure, fall through to print
|
||||||
a generic message. */
|
a generic message. */
|
||||||
if (OMPI_ERR_UNREACH == ret) {
|
if (OMPI_ERR_UNREACH == OPAL_SOS_GET_ERROR_CODE(ret)) {
|
||||||
orte_show_help("help-mpi-runtime",
|
orte_show_help("help-mpi-runtime",
|
||||||
"mpi_init:startup:pml-add-procs-fail", true);
|
"mpi_init:startup:pml-add-procs-fail", true);
|
||||||
error = NULL;
|
error = NULL;
|
||||||
|
@ -34,7 +34,7 @@ ompi_init_preconnect_mpi(void)
|
|||||||
char inbuf[1], outbuf[1];
|
char inbuf[1], outbuf[1];
|
||||||
|
|
||||||
param = mca_base_param_find("mpi", NULL, "preconnect_mpi");
|
param = mca_base_param_find("mpi", NULL, "preconnect_mpi");
|
||||||
if (OMPI_ERROR == param) return OMPI_SUCCESS;
|
if (OMPI_SUCCESS != param) return OMPI_SUCCESS;
|
||||||
ret = mca_base_param_lookup_int(param, &value);
|
ret = mca_base_param_lookup_int(param, &value);
|
||||||
if (OMPI_SUCCESS != ret || 0 == value) {
|
if (OMPI_SUCCESS != ret || 0 == value) {
|
||||||
return OMPI_SUCCESS;
|
return OMPI_SUCCESS;
|
||||||
|
@ -46,6 +46,7 @@
|
|||||||
#include "opal/mca/base/base.h"
|
#include "opal/mca/base/base.h"
|
||||||
#include "opal/util/cmd_line.h"
|
#include "opal/util/cmd_line.h"
|
||||||
#include "opal/util/output.h"
|
#include "opal/util/output.h"
|
||||||
|
#include "opal/util/opal_sos.h"
|
||||||
#include "orte/util/show_help.h"
|
#include "orte/util/show_help.h"
|
||||||
#include "opal/util/daemon_init.h"
|
#include "opal/util/daemon_init.h"
|
||||||
#include "opal/runtime/opal.h"
|
#include "opal/runtime/opal.h"
|
||||||
|
@ -383,7 +383,7 @@ void ompi_info_open_components(void)
|
|||||||
}
|
}
|
||||||
map = OBJ_NEW(ompi_info_component_map_t);
|
map = OBJ_NEW(ompi_info_component_map_t);
|
||||||
map->type = strdup("notifier");
|
map->type = strdup("notifier");
|
||||||
map->components = &mca_notifier_base_components_available;
|
map->components = &orte_notifier_base_components_available;
|
||||||
opal_pointer_array_add(&component_map, map);
|
opal_pointer_array_add(&component_map, map);
|
||||||
|
|
||||||
#if !ORTE_DISABLE_FULL_SUPPORT
|
#if !ORTE_DISABLE_FULL_SUPPORT
|
||||||
|
@ -500,6 +500,7 @@ void ompi_info_do_config(bool want_all)
|
|||||||
char *memprofile;
|
char *memprofile;
|
||||||
char *memdebug;
|
char *memdebug;
|
||||||
char *debug;
|
char *debug;
|
||||||
|
char *log_event;
|
||||||
char *mpi_interface_warning;
|
char *mpi_interface_warning;
|
||||||
char *cprofiling;
|
char *cprofiling;
|
||||||
char *cxxprofiling;
|
char *cxxprofiling;
|
||||||
@ -545,6 +546,7 @@ void ompi_info_do_config(bool want_all)
|
|||||||
memprofile = OPAL_ENABLE_MEM_PROFILE ? "yes" : "no";
|
memprofile = OPAL_ENABLE_MEM_PROFILE ? "yes" : "no";
|
||||||
memdebug = OPAL_ENABLE_MEM_DEBUG ? "yes" : "no";
|
memdebug = OPAL_ENABLE_MEM_DEBUG ? "yes" : "no";
|
||||||
debug = OPAL_ENABLE_DEBUG ? "yes" : "no";
|
debug = OPAL_ENABLE_DEBUG ? "yes" : "no";
|
||||||
|
log_event = ORTE_WANT_NOTIFIER_LOG_EVENT ? "yes" : "no";
|
||||||
mpi_interface_warning = OMPI_WANT_MPI_INTERFACE_WARNING ? "yes" : "no";
|
mpi_interface_warning = OMPI_WANT_MPI_INTERFACE_WARNING ? "yes" : "no";
|
||||||
cprofiling = OMPI_ENABLE_MPI_PROFILING ? "yes" : "no";
|
cprofiling = OMPI_ENABLE_MPI_PROFILING ? "yes" : "no";
|
||||||
cxxprofiling = (OMPI_WANT_CXX_BINDINGS && OMPI_ENABLE_MPI_PROFILING) ? "yes" : "no";
|
cxxprofiling = (OMPI_WANT_CXX_BINDINGS && OMPI_ENABLE_MPI_PROFILING) ? "yes" : "no";
|
||||||
@ -818,6 +820,7 @@ void ompi_info_do_config(bool want_all)
|
|||||||
ompi_info_out("MPI parameter check", "option:mpi-param-check", paramcheck);
|
ompi_info_out("MPI parameter check", "option:mpi-param-check", paramcheck);
|
||||||
ompi_info_out("Memory profiling support", "option:mem-profile", memprofile);
|
ompi_info_out("Memory profiling support", "option:mem-profile", memprofile);
|
||||||
ompi_info_out("Memory debugging support", "option:mem-debug", memdebug);
|
ompi_info_out("Memory debugging support", "option:mem-debug", memdebug);
|
||||||
|
ompi_info_out("Unusual events notif.", "option:log-event", log_event);
|
||||||
ompi_info_out("libltdl support", "option:dlopen", want_libltdl);
|
ompi_info_out("libltdl support", "option:dlopen", want_libltdl);
|
||||||
ompi_info_out("Heterogeneous support", "options:heterogeneous", heterogeneous);
|
ompi_info_out("Heterogeneous support", "options:heterogeneous", heterogeneous);
|
||||||
ompi_info_out("mpirun default --prefix", "mpirun:prefix_by_default",
|
ompi_info_out("mpirun default --prefix", "mpirun:prefix_by_default",
|
||||||
|
@ -48,9 +48,7 @@
|
|||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
#ifdef HAVE_UNISTD_H
|
|
||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
#endif
|
|
||||||
#include <errno.h>
|
#include <errno.h>
|
||||||
#ifdef HAVE_FCNTL_H
|
#ifdef HAVE_FCNTL_H
|
||||||
#include <fcntl.h>
|
#include <fcntl.h>
|
||||||
|
@ -27,6 +27,7 @@
|
|||||||
#include "opal/util/argv.h"
|
#include "opal/util/argv.h"
|
||||||
#include "opal/util/output.h"
|
#include "opal/util/output.h"
|
||||||
#include "opal/util/show_help.h"
|
#include "opal/util/show_help.h"
|
||||||
|
#include "opal/util/opal_sos.h"
|
||||||
#include "opal/mca/mca.h"
|
#include "opal/mca/mca.h"
|
||||||
#include "opal/mca/base/base.h"
|
#include "opal/mca/base/base.h"
|
||||||
#include "opal/mca/base/mca_base_component_repository.h"
|
#include "opal/mca/base/mca_base_component_repository.h"
|
||||||
@ -296,7 +297,7 @@ static int parse_requested(int mca_param, bool *include_mode,
|
|||||||
|
|
||||||
/* See if the user requested anything */
|
/* See if the user requested anything */
|
||||||
|
|
||||||
if (OPAL_ERROR == mca_base_param_lookup_string(mca_param, &requested)) {
|
if (OPAL_SUCCESS != mca_base_param_lookup_string(mca_param, &requested)) {
|
||||||
return OPAL_ERROR;
|
return OPAL_ERROR;
|
||||||
}
|
}
|
||||||
if (NULL == requested || 0 == strlen(requested)) {
|
if (NULL == requested || 0 == strlen(requested)) {
|
||||||
@ -391,7 +392,7 @@ static int open_components(const char *type_name, int output_id,
|
|||||||
"mca: base: components_open: "
|
"mca: base: components_open: "
|
||||||
"component %s register function successful",
|
"component %s register function successful",
|
||||||
component->mca_component_name);
|
component->mca_component_name);
|
||||||
} else if (OPAL_ERR_NOT_AVAILABLE != ret) {
|
} else if (OPAL_ERR_NOT_AVAILABLE != OPAL_SOS_GET_ERROR_CODE(ret)) {
|
||||||
/* If the component returns OPAL_ERR_NOT_AVAILABLE,
|
/* If the component returns OPAL_ERR_NOT_AVAILABLE,
|
||||||
it's a cue to "silently ignore me" -- it's not a
|
it's a cue to "silently ignore me" -- it's not a
|
||||||
failure, it's just a way for the component to say
|
failure, it's just a way for the component to say
|
||||||
@ -431,7 +432,7 @@ static int open_components(const char *type_name, int output_id,
|
|||||||
"mca: base: components_open: "
|
"mca: base: components_open: "
|
||||||
"component %s open function successful",
|
"component %s open function successful",
|
||||||
component->mca_component_name);
|
component->mca_component_name);
|
||||||
} else if (OPAL_ERR_NOT_AVAILABLE != ret) {
|
} else if (OPAL_ERR_NOT_AVAILABLE != OPAL_SOS_GET_ERROR_CODE(ret)) {
|
||||||
/* If the component returns OPAL_ERR_NOT_AVAILABLE,
|
/* If the component returns OPAL_ERR_NOT_AVAILABLE,
|
||||||
it's a cue to "silently ignore me" -- it's not a
|
it's a cue to "silently ignore me" -- it's not a
|
||||||
failure, it's just a way for the component to say
|
failure, it's just a way for the component to say
|
||||||
@ -482,7 +483,7 @@ static int open_components(const char *type_name, int output_id,
|
|||||||
opened_components list */
|
opened_components list */
|
||||||
|
|
||||||
else {
|
else {
|
||||||
if (OPAL_ERROR == mca_base_param_find(type_name,
|
if (OPAL_SUCCESS != mca_base_param_find(type_name,
|
||||||
component->mca_component_name,
|
component->mca_component_name,
|
||||||
"priority")) {
|
"priority")) {
|
||||||
mca_base_param_register_int(type_name,
|
mca_base_param_register_int(type_name,
|
||||||
|
@ -630,7 +630,7 @@ char *mca_base_param_environ_variable(const char *type,
|
|||||||
}
|
}
|
||||||
|
|
||||||
id = mca_base_param_find(type, component, param);
|
id = mca_base_param_find(type, component, param);
|
||||||
if (OPAL_ERROR != id) {
|
if (OPAL_SUCCESS == id) {
|
||||||
array = OPAL_VALUE_ARRAY_GET_BASE(&mca_base_params, mca_base_param_t);
|
array = OPAL_VALUE_ARRAY_GET_BASE(&mca_base_params, mca_base_param_t);
|
||||||
ret = strdup(array[id].mbp_env_var_name);
|
ret = strdup(array[id].mbp_env_var_name);
|
||||||
} else {
|
} else {
|
||||||
|
@ -28,6 +28,7 @@
|
|||||||
#include "opal/mca/crs/crs.h"
|
#include "opal/mca/crs/crs.h"
|
||||||
#include "opal/mca/crs/base/base.h"
|
#include "opal/mca/crs/base/base.h"
|
||||||
#include "opal/util/output.h"
|
#include "opal/util/output.h"
|
||||||
|
#include "opal/util/opal_sos.h"
|
||||||
|
|
||||||
#include "opal/mca/crs/base/static-components.h"
|
#include "opal/mca/crs/base/static-components.h"
|
||||||
|
|
||||||
@ -97,7 +98,7 @@ int opal_crs_base_open(void)
|
|||||||
mca_crs_base_static_components,
|
mca_crs_base_static_components,
|
||||||
&opal_crs_base_components_available,
|
&opal_crs_base_components_available,
|
||||||
true)) ) {
|
true)) ) {
|
||||||
if( OPAL_ERR_NOT_FOUND == ret &&
|
if( OPAL_ERR_NOT_FOUND == OPAL_SOS_GET_ERROR_CODE(ret) &&
|
||||||
NULL != str_value &&
|
NULL != str_value &&
|
||||||
0 == strncmp(str_value, "none", strlen("none")) ) {
|
0 == strncmp(str_value, "none", strlen("none")) ) {
|
||||||
exit_status = OPAL_SUCCESS;
|
exit_status = OPAL_SUCCESS;
|
||||||
|
@ -53,6 +53,7 @@
|
|||||||
#include "opal/util/stacktrace.h"
|
#include "opal/util/stacktrace.h"
|
||||||
#include "opal/util/keyval_parse.h"
|
#include "opal/util/keyval_parse.h"
|
||||||
#include "opal/util/sys_limits.h"
|
#include "opal/util/sys_limits.h"
|
||||||
|
#include "opal/util/opal_sos.h"
|
||||||
|
|
||||||
#if OPAL_CC_USE_PRAGMA_IDENT
|
#if OPAL_CC_USE_PRAGMA_IDENT
|
||||||
#pragma ident OPAL_IDENT_STRING
|
#pragma ident OPAL_IDENT_STRING
|
||||||
@ -71,7 +72,7 @@ opal_err2str(int errnum)
|
|||||||
{
|
{
|
||||||
const char *retval;
|
const char *retval;
|
||||||
|
|
||||||
switch (errnum) {
|
switch (OPAL_SOS_GET_ERROR_CODE(errnum)) {
|
||||||
case OPAL_SUCCESS:
|
case OPAL_SUCCESS:
|
||||||
retval = "Success";
|
retval = "Success";
|
||||||
break;
|
break;
|
||||||
@ -223,6 +224,9 @@ opal_init_util(int* pargc, char*** pargv)
|
|||||||
/* initialize the memory allocator */
|
/* initialize the memory allocator */
|
||||||
opal_malloc_init();
|
opal_malloc_init();
|
||||||
|
|
||||||
|
/* initialize the OPAL SOS system */
|
||||||
|
opal_sos_init();
|
||||||
|
|
||||||
/* initialize the output system */
|
/* initialize the output system */
|
||||||
opal_output_init();
|
opal_output_init();
|
||||||
|
|
||||||
|
@ -161,7 +161,7 @@ static char **opal_argv_split_inter(const char *src_string, int delimiter,
|
|||||||
if (src_string == p) {
|
if (src_string == p) {
|
||||||
if (include_empty) {
|
if (include_empty) {
|
||||||
arg[0] = '\0';
|
arg[0] = '\0';
|
||||||
if (OPAL_ERROR == opal_argv_append(&argc, &argv, arg))
|
if (OPAL_SUCCESS != opal_argv_append(&argc, &argv, arg))
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -169,7 +169,7 @@ static char **opal_argv_split_inter(const char *src_string, int delimiter,
|
|||||||
/* tail argument, add straight from the original string */
|
/* tail argument, add straight from the original string */
|
||||||
|
|
||||||
else if ('\0' == *p) {
|
else if ('\0' == *p) {
|
||||||
if (OPAL_ERROR == opal_argv_append(&argc, &argv, src_string))
|
if (OPAL_SUCCESS != opal_argv_append(&argc, &argv, src_string))
|
||||||
return NULL;
|
return NULL;
|
||||||
src_string = p;
|
src_string = p;
|
||||||
continue;
|
continue;
|
||||||
@ -185,7 +185,7 @@ static char **opal_argv_split_inter(const char *src_string, int delimiter,
|
|||||||
strncpy(argtemp, src_string, arglen);
|
strncpy(argtemp, src_string, arglen);
|
||||||
argtemp[arglen] = '\0';
|
argtemp[arglen] = '\0';
|
||||||
|
|
||||||
if (OPAL_ERROR == opal_argv_append(&argc, &argv, argtemp)) {
|
if (OPAL_SUCCESS != opal_argv_append(&argc, &argv, argtemp)) {
|
||||||
free(argtemp);
|
free(argtemp);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
@ -199,7 +199,7 @@ static char **opal_argv_split_inter(const char *src_string, int delimiter,
|
|||||||
strncpy(arg, src_string, arglen);
|
strncpy(arg, src_string, arglen);
|
||||||
arg[arglen] = '\0';
|
arg[arglen] = '\0';
|
||||||
|
|
||||||
if (OPAL_ERROR == opal_argv_append(&argc, &argv, arg))
|
if (OPAL_SUCCESS != opal_argv_append(&argc, &argv, arg))
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -389,7 +389,7 @@ char **opal_argv_copy(char **argv)
|
|||||||
dupv[0] = NULL;
|
dupv[0] = NULL;
|
||||||
|
|
||||||
while (NULL != *argv) {
|
while (NULL != *argv) {
|
||||||
if (OPAL_ERROR == opal_argv_append(&dupc, &dupv, *argv)) {
|
if (OPAL_SUCCESS != opal_argv_append(&dupc, &dupv, *argv)) {
|
||||||
opal_argv_free(dupv);
|
opal_argv_free(dupv);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
@ -30,6 +30,7 @@
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
#include "opal/util/error.h"
|
#include "opal/util/error.h"
|
||||||
|
#include "opal/util/opal_sos.h"
|
||||||
#include "opal/constants.h"
|
#include "opal/constants.h"
|
||||||
|
|
||||||
#define MAX_CONVERTERS 5
|
#define MAX_CONVERTERS 5
|
||||||
@ -94,12 +95,12 @@ opal_perror(int errnum, const char *msg)
|
|||||||
{
|
{
|
||||||
const char* errmsg = opal_strerror_int(errnum);
|
const char* errmsg = opal_strerror_int(errnum);
|
||||||
|
|
||||||
if (NULL != msg && errnum != OPAL_ERR_IN_ERRNO) {
|
if (NULL != msg && OPAL_SOS_GET_ERROR_CODE(errnum) != OPAL_ERR_IN_ERRNO) {
|
||||||
fprintf(stderr, "%s: ", msg);
|
fprintf(stderr, "%s: ", msg);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (NULL == errmsg) {
|
if (NULL == errmsg) {
|
||||||
if (errnum == OPAL_ERR_IN_ERRNO) {
|
if (OPAL_SOS_GET_ERROR_CODE(errnum) == OPAL_ERR_IN_ERRNO) {
|
||||||
perror(msg);
|
perror(msg);
|
||||||
} else {
|
} else {
|
||||||
char *ue_msg = opal_strerror_unknown(errnum);
|
char *ue_msg = opal_strerror_unknown(errnum);
|
||||||
@ -147,7 +148,7 @@ opal_strerror_r(int errnum, char *strerrbuf, size_t buflen)
|
|||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
if (NULL == errmsg) {
|
if (NULL == errmsg) {
|
||||||
if (errnum == OPAL_ERR_IN_ERRNO) {
|
if (OPAL_SOS_GET_ERROR_CODE(errnum) == OPAL_ERR_IN_ERRNO) {
|
||||||
char *tmp = strerror(errno);
|
char *tmp = strerror(errno);
|
||||||
strncpy(strerrbuf, tmp, buflen);
|
strncpy(strerrbuf, tmp, buflen);
|
||||||
return OPAL_SUCCESS;
|
return OPAL_SUCCESS;
|
||||||
|
@ -29,6 +29,7 @@
|
|||||||
|
|
||||||
#include "opal/util/trace.h"
|
#include "opal/util/trace.h"
|
||||||
#include "opal/util/output.h"
|
#include "opal/util/output.h"
|
||||||
|
#include "opal/util/opal_sos.h"
|
||||||
|
|
||||||
#include "orte/util/name_fns.h"
|
#include "orte/util/name_fns.h"
|
||||||
#include "orte/util/session_dir.h"
|
#include "orte/util/session_dir.h"
|
||||||
@ -48,7 +49,7 @@ void orte_errmgr_base_log(int error_code, char *filename, int line)
|
|||||||
{
|
{
|
||||||
OPAL_TRACE(1);
|
OPAL_TRACE(1);
|
||||||
|
|
||||||
if (ORTE_ERR_SILENT == error_code) {
|
if (ORTE_ERR_SILENT == OPAL_SOS_GET_ERROR_CODE(error_code)) {
|
||||||
/* if the error is silent, say nothing */
|
/* if the error is silent, say nothing */
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -21,6 +21,7 @@
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
#include "opal/util/output.h"
|
#include "opal/util/output.h"
|
||||||
|
#include "opal/util/opal_sos.h"
|
||||||
#include "opal/dss/dss.h"
|
#include "opal/dss/dss.h"
|
||||||
|
|
||||||
#include "orte/util/error_strings.h"
|
#include "orte/util/error_strings.h"
|
||||||
|
@ -21,6 +21,7 @@
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
#include "opal/util/output.h"
|
#include "opal/util/output.h"
|
||||||
|
#include "opal/util/opal_sos.h"
|
||||||
#include "opal/dss/dss.h"
|
#include "opal/dss/dss.h"
|
||||||
|
|
||||||
#include "orte/util/error_strings.h"
|
#include "orte/util/error_strings.h"
|
||||||
|
@ -26,6 +26,7 @@
|
|||||||
#include <errno.h>
|
#include <errno.h>
|
||||||
|
|
||||||
#include "opal/util/opal_environ.h"
|
#include "opal/util/opal_environ.h"
|
||||||
|
#include "opal/util/opal_sos.h"
|
||||||
#include "opal/mca/base/mca_base_param.h"
|
#include "opal/mca/base/mca_base_param.h"
|
||||||
|
|
||||||
#include "orte/mca/errmgr/errmgr.h"
|
#include "orte/mca/errmgr/errmgr.h"
|
||||||
|
@ -26,6 +26,7 @@
|
|||||||
#include <errno.h>
|
#include <errno.h>
|
||||||
|
|
||||||
#include "opal/util/opal_environ.h"
|
#include "opal/util/opal_environ.h"
|
||||||
|
#include "opal/util/opal_sos.h"
|
||||||
#include "opal/mca/base/mca_base_param.h"
|
#include "opal/mca/base/mca_base_param.h"
|
||||||
|
|
||||||
#include "orte/mca/errmgr/errmgr.h"
|
#include "orte/mca/errmgr/errmgr.h"
|
||||||
|
@ -28,6 +28,8 @@
|
|||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#include "opal/util/opal_sos.h"
|
||||||
|
|
||||||
#include "orte/mca/errmgr/errmgr.h"
|
#include "orte/mca/errmgr/errmgr.h"
|
||||||
#include "orte/util/show_help.h"
|
#include "orte/util/show_help.h"
|
||||||
#include "orte/runtime/orte_wait.h"
|
#include "orte/runtime/orte_wait.h"
|
||||||
|
@ -29,7 +29,7 @@
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
#include "opal/event/event.h"
|
#include "opal/event/event.h"
|
||||||
#include "orte/util/show_help.h"
|
#include "opal/util/opal_sos.h"
|
||||||
#include "opal/runtime/opal.h"
|
#include "opal/runtime/opal.h"
|
||||||
#include "opal/runtime/opal_cr.h"
|
#include "opal/runtime/opal_cr.h"
|
||||||
|
|
||||||
|
@ -29,6 +29,7 @@
|
|||||||
|
|
||||||
#include "opal/util/argv.h"
|
#include "opal/util/argv.h"
|
||||||
#include "opal/util/if.h"
|
#include "opal/util/if.h"
|
||||||
|
#include "opal/util/opal_sos.h"
|
||||||
#include "opal/mca/paffinity/paffinity.h"
|
#include "opal/mca/paffinity/paffinity.h"
|
||||||
#include "opal/mca/sysinfo/sysinfo.h"
|
#include "opal/mca/sysinfo/sysinfo.h"
|
||||||
#include "opal/mca/sysinfo/base/base.h"
|
#include "opal/mca/sysinfo/base/base.h"
|
||||||
|
1
orte/mca/ess/env/ess_env_module.c
поставляемый
1
orte/mca/ess/env/ess_env_module.c
поставляемый
@ -39,6 +39,7 @@
|
|||||||
#include "opal/mca/base/base.h"
|
#include "opal/mca/base/base.h"
|
||||||
#include "opal/mca/base/mca_base_param.h"
|
#include "opal/mca/base/mca_base_param.h"
|
||||||
#include "opal/util/output.h"
|
#include "opal/util/output.h"
|
||||||
|
#include "opal/util/opal_sos.h"
|
||||||
#include "opal/util/malloc.h"
|
#include "opal/util/malloc.h"
|
||||||
#include "opal/util/argv.h"
|
#include "opal/util/argv.h"
|
||||||
|
|
||||||
|
@ -336,7 +336,7 @@ static int rte_init(void)
|
|||||||
/* setup the orte_show_help system to recv remote output */
|
/* setup the orte_show_help system to recv remote output */
|
||||||
ret = orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_SHOW_HELP,
|
ret = orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_SHOW_HELP,
|
||||||
ORTE_RML_NON_PERSISTENT, orte_show_help_recv, NULL);
|
ORTE_RML_NON_PERSISTENT, orte_show_help_recv, NULL);
|
||||||
if (ret != ORTE_SUCCESS && ret != ORTE_ERR_NOT_IMPLEMENTED) {
|
if (ret != ORTE_SUCCESS && OPAL_SOS_GET_ERROR_CODE(ret) != ORTE_ERR_NOT_IMPLEMENTED) {
|
||||||
ORTE_ERROR_LOG(ret);
|
ORTE_ERROR_LOG(ret);
|
||||||
error = "setup receive for orte_show_help";
|
error = "setup receive for orte_show_help";
|
||||||
goto error;
|
goto error;
|
||||||
@ -586,7 +586,7 @@ static int rte_init(void)
|
|||||||
return ORTE_SUCCESS;
|
return ORTE_SUCCESS;
|
||||||
|
|
||||||
error:
|
error:
|
||||||
if (ORTE_ERR_SILENT != ret) {
|
if (ORTE_ERR_SILENT != OPAL_SOS_GET_ERROR_CODE(ret)) {
|
||||||
orte_show_help("help-orte-runtime.txt",
|
orte_show_help("help-orte-runtime.txt",
|
||||||
"orte_init:startup:internal-failure",
|
"orte_init:startup:internal-failure",
|
||||||
true, error, ORTE_ERROR_NAME(ret), ret);
|
true, error, ORTE_ERROR_NAME(ret), ret);
|
||||||
|
@ -34,6 +34,7 @@
|
|||||||
|
|
||||||
#include "opal/util/argv.h"
|
#include "opal/util/argv.h"
|
||||||
#include "opal/util/path.h"
|
#include "opal/util/path.h"
|
||||||
|
#include "opal/util/opal_sos.h"
|
||||||
#include "opal/mca/base/mca_base_param.h"
|
#include "opal/mca/base/mca_base_param.h"
|
||||||
#include "opal/mca/installdirs/installdirs.h"
|
#include "opal/mca/installdirs/installdirs.h"
|
||||||
#include "opal/mca/paffinity/paffinity.h"
|
#include "opal/mca/paffinity/paffinity.h"
|
||||||
|
@ -38,6 +38,7 @@
|
|||||||
#include "opal/mca/base/base.h"
|
#include "opal/mca/base/base.h"
|
||||||
#include "opal/mca/base/mca_base_param.h"
|
#include "opal/mca/base/mca_base_param.h"
|
||||||
#include "opal/util/output.h"
|
#include "opal/util/output.h"
|
||||||
|
#include "opal/util/opal_sos.h"
|
||||||
#include "opal/util/malloc.h"
|
#include "opal/util/malloc.h"
|
||||||
|
|
||||||
#include "orte/mca/rml/base/base.h"
|
#include "orte/mca/rml/base/base.h"
|
||||||
|
@ -32,6 +32,7 @@
|
|||||||
|
|
||||||
#include "opal/util/opal_environ.h"
|
#include "opal/util/opal_environ.h"
|
||||||
#include "opal/util/output.h"
|
#include "opal/util/output.h"
|
||||||
|
#include "opal/util/opal_sos.h"
|
||||||
#include "opal/mca/base/mca_base_param.h"
|
#include "opal/mca/base/mca_base_param.h"
|
||||||
#include "opal/util/argv.h"
|
#include "opal/util/argv.h"
|
||||||
#include "opal/class/opal_pointer_array.h"
|
#include "opal/class/opal_pointer_array.h"
|
||||||
|
@ -38,6 +38,7 @@
|
|||||||
|
|
||||||
#include "opal/util/opal_environ.h"
|
#include "opal/util/opal_environ.h"
|
||||||
#include "opal/util/output.h"
|
#include "opal/util/output.h"
|
||||||
|
#include "opal/util/opal_sos.h"
|
||||||
#include "opal/mca/base/mca_base_param.h"
|
#include "opal/mca/base/mca_base_param.h"
|
||||||
#include "opal/util/argv.h"
|
#include "opal/util/argv.h"
|
||||||
#include "opal/class/opal_pointer_array.h"
|
#include "opal/class/opal_pointer_array.h"
|
||||||
@ -400,7 +401,8 @@ static int rte_finalize(void)
|
|||||||
|
|
||||||
static void rte_abort(int error_code, bool report)
|
static void rte_abort(int error_code, bool report)
|
||||||
{
|
{
|
||||||
if (ORTE_ERR_SOCKET_NOT_AVAILABLE == error_code && slurm20) {
|
if (ORTE_ERR_SOCKET_NOT_AVAILABLE == OPAL_SOS_GET_ERROR_CODE(error_code) &&
|
||||||
|
slurm20) {
|
||||||
/* exit silently with a special error code for slurm 2.0 */
|
/* exit silently with a special error code for slurm 2.0 */
|
||||||
orte_ess_base_app_abort(108, false);
|
orte_ess_base_app_abort(108, false);
|
||||||
} else {
|
} else {
|
||||||
|
@ -29,6 +29,7 @@
|
|||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#include "opal/util/opal_sos.h"
|
||||||
|
|
||||||
#include "orte/util/show_help.h"
|
#include "orte/util/show_help.h"
|
||||||
#include "orte/mca/plm/base/base.h"
|
#include "orte/mca/plm/base/base.h"
|
||||||
|
@ -29,6 +29,7 @@
|
|||||||
|
|
||||||
#include "orte/constants.h"
|
#include "orte/constants.h"
|
||||||
|
|
||||||
|
#include "opal/util/opal_sos.h"
|
||||||
#include "opal/mca/mca.h"
|
#include "opal/mca/mca.h"
|
||||||
#include "opal/mca/base/base.h"
|
#include "opal/mca/base/base.h"
|
||||||
|
|
||||||
|
@ -40,6 +40,7 @@
|
|||||||
|
|
||||||
#include "opal/mca/mca.h"
|
#include "opal/mca/mca.h"
|
||||||
#include "opal/util/output.h"
|
#include "opal/util/output.h"
|
||||||
|
#include "opal/util/opal_sos.h"
|
||||||
#include "opal/mca/base/mca_base_param.h"
|
#include "opal/mca/base/mca_base_param.h"
|
||||||
|
|
||||||
#include "opal/dss/dss.h"
|
#include "opal/dss/dss.h"
|
||||||
|
@ -36,6 +36,7 @@
|
|||||||
#include "opal/mca/base/mca_base_param.h"
|
#include "opal/mca/base/mca_base_param.h"
|
||||||
|
|
||||||
#include "opal/event/event.h"
|
#include "opal/event/event.h"
|
||||||
|
#include "opal/util/opal_sos.h"
|
||||||
|
|
||||||
#include "orte/constants.h"
|
#include "orte/constants.h"
|
||||||
#include "orte/util/show_help.h"
|
#include "orte/util/show_help.h"
|
||||||
|
@ -26,6 +26,7 @@
|
|||||||
|
|
||||||
#include "orte/util/proc_info.h"
|
#include "orte/util/proc_info.h"
|
||||||
#include "opal/dss/dss.h"
|
#include "opal/dss/dss.h"
|
||||||
|
#include "opal/util/opal_sos.h"
|
||||||
#include "orte/mca/errmgr/errmgr.h"
|
#include "orte/mca/errmgr/errmgr.h"
|
||||||
#include "orte/mca/odls/base/base.h"
|
#include "orte/mca/odls/base/base.h"
|
||||||
#include "orte/mca/odls/odls_types.h"
|
#include "orte/mca/odls/odls_types.h"
|
||||||
|
@ -27,6 +27,7 @@
|
|||||||
#endif /* HAVE_SYS_TIME_H */
|
#endif /* HAVE_SYS_TIME_H */
|
||||||
|
|
||||||
#include "opal/util/output.h"
|
#include "opal/util/output.h"
|
||||||
|
#include "opal/util/opal_sos.h"
|
||||||
|
|
||||||
#include "opal/dss/dss.h"
|
#include "opal/dss/dss.h"
|
||||||
#include "orte/mca/errmgr/errmgr.h"
|
#include "orte/mca/errmgr/errmgr.h"
|
||||||
@ -67,7 +68,7 @@ static void allgather_server_recv(int status, orte_process_name_t* sender,
|
|||||||
/* reissue the recv */
|
/* reissue the recv */
|
||||||
rc = orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_ALLGATHER_LIST,
|
rc = orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_ALLGATHER_LIST,
|
||||||
ORTE_RML_NON_PERSISTENT, allgather_server_recv, NULL);
|
ORTE_RML_NON_PERSISTENT, allgather_server_recv, NULL);
|
||||||
if (rc != ORTE_SUCCESS && rc != ORTE_ERR_NOT_IMPLEMENTED) {
|
if (rc != ORTE_SUCCESS && OPAL_SOS_GET_ERROR_CODE(rc) != ORTE_ERR_NOT_IMPLEMENTED) {
|
||||||
ORTE_ERROR_LOG(rc);
|
ORTE_ERROR_LOG(rc);
|
||||||
allgather_failed = true;
|
allgather_failed = true;
|
||||||
}
|
}
|
||||||
@ -147,7 +148,7 @@ int orte_grpcomm_base_allgather_list(opal_list_t *names, opal_buffer_t *sbuf, op
|
|||||||
allgather_failed = false;
|
allgather_failed = false;
|
||||||
rc = orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_ALLGATHER_LIST,
|
rc = orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_ALLGATHER_LIST,
|
||||||
ORTE_RML_NON_PERSISTENT, allgather_client_recv, NULL);
|
ORTE_RML_NON_PERSISTENT, allgather_client_recv, NULL);
|
||||||
if (rc != ORTE_SUCCESS && rc != ORTE_ERR_NOT_IMPLEMENTED) {
|
if (rc != ORTE_SUCCESS && OPAL_SOS_GET_ERROR_CODE(rc) != ORTE_ERR_NOT_IMPLEMENTED) {
|
||||||
ORTE_ERROR_LOG(rc);
|
ORTE_ERROR_LOG(rc);
|
||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
@ -208,7 +209,7 @@ int orte_grpcomm_base_allgather_list(opal_list_t *names, opal_buffer_t *sbuf, op
|
|||||||
/* post the non-blocking recv */
|
/* post the non-blocking recv */
|
||||||
rc = orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_ALLGATHER_LIST,
|
rc = orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_ALLGATHER_LIST,
|
||||||
ORTE_RML_NON_PERSISTENT, allgather_server_recv, NULL);
|
ORTE_RML_NON_PERSISTENT, allgather_server_recv, NULL);
|
||||||
if (rc != ORTE_SUCCESS && rc != ORTE_ERR_NOT_IMPLEMENTED) {
|
if (rc != ORTE_SUCCESS && OPAL_SOS_GET_ERROR_CODE(rc) != ORTE_ERR_NOT_IMPLEMENTED) {
|
||||||
ORTE_ERROR_LOG(rc);
|
ORTE_ERROR_LOG(rc);
|
||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
|
@ -29,6 +29,7 @@
|
|||||||
#endif /* HAVE_SYS_TIME_H */
|
#endif /* HAVE_SYS_TIME_H */
|
||||||
|
|
||||||
#include "opal/util/output.h"
|
#include "opal/util/output.h"
|
||||||
|
#include "opal/util/opal_sos.h"
|
||||||
#include "opal/class/opal_hash_table.h"
|
#include "opal/class/opal_hash_table.h"
|
||||||
#include "opal/dss/dss.h"
|
#include "opal/dss/dss.h"
|
||||||
#include "opal/threads/mutex.h"
|
#include "opal/threads/mutex.h"
|
||||||
|
@ -33,6 +33,7 @@
|
|||||||
#endif /* HAVE_SYS_TIME_H */
|
#endif /* HAVE_SYS_TIME_H */
|
||||||
|
|
||||||
#include "opal/util/output.h"
|
#include "opal/util/output.h"
|
||||||
|
#include "opal/util/opal_sos.h"
|
||||||
#include "opal/dss/dss.h"
|
#include "opal/dss/dss.h"
|
||||||
|
|
||||||
#include "orte/mca/errmgr/errmgr.h"
|
#include "orte/mca/errmgr/errmgr.h"
|
||||||
|
@ -29,6 +29,7 @@
|
|||||||
|
|
||||||
#include "opal/threads/condition.h"
|
#include "opal/threads/condition.h"
|
||||||
#include "opal/util/output.h"
|
#include "opal/util/output.h"
|
||||||
|
#include "opal/util/opal_sos.h"
|
||||||
#include "opal/class/opal_hash_table.h"
|
#include "opal/class/opal_hash_table.h"
|
||||||
#include "opal/dss/dss.h"
|
#include "opal/dss/dss.h"
|
||||||
|
|
||||||
|
@ -26,6 +26,7 @@
|
|||||||
|
|
||||||
#include "opal/dss/dss.h"
|
#include "opal/dss/dss.h"
|
||||||
#include "opal/runtime/opal.h"
|
#include "opal/runtime/opal.h"
|
||||||
|
#include "opal/util/opal_sos.h"
|
||||||
|
|
||||||
#include "orte/mca/errmgr/errmgr.h"
|
#include "orte/mca/errmgr/errmgr.h"
|
||||||
#include "orte/mca/ess/ess.h"
|
#include "orte/mca/ess/ess.h"
|
||||||
|
@ -25,6 +25,7 @@
|
|||||||
#include <fcntl.h>
|
#include <fcntl.h>
|
||||||
|
|
||||||
#include "opal/dss/dss.h"
|
#include "opal/dss/dss.h"
|
||||||
|
#include "opal/util/opal_sos.h"
|
||||||
#include "opal/runtime/opal.h"
|
#include "opal/runtime/opal.h"
|
||||||
#include "opal/mca/paffinity/paffinity.h"
|
#include "opal/mca/paffinity/paffinity.h"
|
||||||
|
|
||||||
|
@ -39,6 +39,7 @@
|
|||||||
#include <errno.h>
|
#include <errno.h>
|
||||||
|
|
||||||
#include "opal/util/output.h"
|
#include "opal/util/output.h"
|
||||||
|
#include "opal/util/opal_sos.h"
|
||||||
|
|
||||||
#include "orte/util/name_fns.h"
|
#include "orte/util/name_fns.h"
|
||||||
#include "orte/runtime/orte_globals.h"
|
#include "orte/runtime/orte_globals.h"
|
||||||
|
@ -21,6 +21,7 @@
|
|||||||
|
|
||||||
#include "opal/mca/mca.h"
|
#include "opal/mca/mca.h"
|
||||||
#include "opal/mca/base/base.h"
|
#include "opal/mca/base/base.h"
|
||||||
|
#include "opal/util/opal_sos.h"
|
||||||
|
|
||||||
#include "orte/util/proc_info.h"
|
#include "orte/util/proc_info.h"
|
||||||
#include "orte/mca/errmgr/errmgr.h"
|
#include "orte/mca/errmgr/errmgr.h"
|
||||||
|
@ -57,6 +57,7 @@
|
|||||||
|
|
||||||
#include "opal/util/opal_pty.h"
|
#include "opal/util/opal_pty.h"
|
||||||
#include "opal/util/opal_environ.h"
|
#include "opal/util/opal_environ.h"
|
||||||
|
#include "opal/util/opal_sos.h"
|
||||||
|
|
||||||
#include "orte/mca/errmgr/errmgr.h"
|
#include "orte/mca/errmgr/errmgr.h"
|
||||||
|
|
||||||
|
@ -37,6 +37,7 @@
|
|||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#include "opal/util/opal_sos.h"
|
||||||
|
|
||||||
#include "orte/runtime/orte_globals.h"
|
#include "orte/runtime/orte_globals.h"
|
||||||
#include "orte/mca/errmgr/errmgr.h"
|
#include "orte/mca/errmgr/errmgr.h"
|
||||||
|
@ -28,6 +28,7 @@
|
|||||||
#include <string.h>
|
#include <string.h>
|
||||||
#endif /* HAVE_STRING_H */
|
#endif /* HAVE_STRING_H */
|
||||||
|
|
||||||
|
#include "opal/util/opal_sos.h"
|
||||||
#include "opal/dss/dss.h"
|
#include "opal/dss/dss.h"
|
||||||
|
|
||||||
#include "orte/mca/rml/rml_types.h"
|
#include "orte/mca/rml/rml_types.h"
|
||||||
@ -184,7 +185,7 @@ void orte_iof_hnp_read_local_handler(int fd, short event, void *cbdata)
|
|||||||
*/
|
*/
|
||||||
if( ORTE_SUCCESS != (rc = orte_iof_hnp_send_data_to_endpoint(&sink->daemon, &sink->name, ORTE_IOF_STDIN, data, numbytes))) {
|
if( ORTE_SUCCESS != (rc = orte_iof_hnp_send_data_to_endpoint(&sink->daemon, &sink->name, ORTE_IOF_STDIN, data, numbytes))) {
|
||||||
/* if the addressee is unknown, remove the sink from the list */
|
/* if the addressee is unknown, remove the sink from the list */
|
||||||
if( ORTE_ERR_ADDRESSEE_UNKNOWN == rc ) {
|
if( ORTE_ERR_ADDRESSEE_UNKNOWN == OPAL_SOS_GET_ERROR_CODE(rc) ) {
|
||||||
prev_item = opal_list_get_prev(item);
|
prev_item = opal_list_get_prev(item);
|
||||||
opal_list_remove_item(&mca_iof_hnp_component.sinks, item);
|
opal_list_remove_item(&mca_iof_hnp_component.sinks, item);
|
||||||
OBJ_RELEASE(item);
|
OBJ_RELEASE(item);
|
||||||
|
@ -35,6 +35,7 @@
|
|||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#include "opal/util/opal_sos.h"
|
||||||
|
|
||||||
#include "orte/mca/rml/rml.h"
|
#include "orte/mca/rml/rml.h"
|
||||||
#include "orte/mca/rml/rml_types.h"
|
#include "orte/mca/rml/rml_types.h"
|
||||||
|
@ -28,6 +28,7 @@
|
|||||||
#include <string.h>
|
#include <string.h>
|
||||||
#endif /* HAVE_STRING_H */
|
#endif /* HAVE_STRING_H */
|
||||||
|
|
||||||
|
#include "opal/util/opal_sos.h"
|
||||||
|
|
||||||
#include "orte/mca/rml/rml.h"
|
#include "orte/mca/rml/rml.h"
|
||||||
#include "orte/mca/rml/rml_types.h"
|
#include "orte/mca/rml/rml_types.h"
|
||||||
|
@ -37,6 +37,7 @@
|
|||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#include "opal/util/opal_sos.h"
|
||||||
|
|
||||||
#include "orte/mca/errmgr/errmgr.h"
|
#include "orte/mca/errmgr/errmgr.h"
|
||||||
#include "orte/util/name_fns.h"
|
#include "orte/util/name_fns.h"
|
||||||
|
@ -29,6 +29,7 @@
|
|||||||
#endif /* HAVE_STRING_H */
|
#endif /* HAVE_STRING_H */
|
||||||
|
|
||||||
#include "opal/dss/dss.h"
|
#include "opal/dss/dss.h"
|
||||||
|
#include "opal/util/opal_sos.h"
|
||||||
|
|
||||||
#include "orte/mca/rml/rml.h"
|
#include "orte/mca/rml/rml.h"
|
||||||
#include "orte/mca/rml/rml_types.h"
|
#include "orte/mca/rml/rml_types.h"
|
||||||
|
@ -28,6 +28,7 @@
|
|||||||
#include <string.h>
|
#include <string.h>
|
||||||
#endif /* HAVE_STRING_H */
|
#endif /* HAVE_STRING_H */
|
||||||
|
|
||||||
|
#include "opal/util/opal_sos.h"
|
||||||
|
|
||||||
#include "orte/mca/rml/rml.h"
|
#include "orte/mca/rml/rml.h"
|
||||||
#include "orte/mca/rml/rml_types.h"
|
#include "orte/mca/rml/rml_types.h"
|
||||||
|
@ -28,6 +28,7 @@
|
|||||||
#include <string.h>
|
#include <string.h>
|
||||||
#endif /* HAVE_STRING_H */
|
#endif /* HAVE_STRING_H */
|
||||||
|
|
||||||
|
#include "opal/util/opal_sos.h"
|
||||||
|
|
||||||
#include "orte/mca/rml/rml.h"
|
#include "orte/mca/rml/rml.h"
|
||||||
#include "orte/mca/rml/rml_types.h"
|
#include "orte/mca/rml/rml_types.h"
|
||||||
|
@ -28,6 +28,7 @@
|
|||||||
#include <string.h>
|
#include <string.h>
|
||||||
#endif /* HAVE_STRING_H */
|
#endif /* HAVE_STRING_H */
|
||||||
|
|
||||||
|
#include "opal/util/opal_sos.h"
|
||||||
|
|
||||||
#include "orte/mca/rml/rml.h"
|
#include "orte/mca/rml/rml.h"
|
||||||
#include "orte/mca/rml/rml_types.h"
|
#include "orte/mca/rml/rml_types.h"
|
||||||
|
@ -40,6 +40,7 @@
|
|||||||
|
|
||||||
#include "opal/util/opal_environ.h"
|
#include "opal/util/opal_environ.h"
|
||||||
#include "opal/util/argv.h"
|
#include "opal/util/argv.h"
|
||||||
|
#include "opal/util/opal_sos.h"
|
||||||
#include "opal/util/os_path.h"
|
#include "opal/util/os_path.h"
|
||||||
#include "opal/util/sys_limits.h"
|
#include "opal/util/sys_limits.h"
|
||||||
#include "opal/dss/dss.h"
|
#include "opal/dss/dss.h"
|
||||||
@ -738,7 +739,7 @@ int orte_odls_base_default_construct_child_list(opal_buffer_t *data,
|
|||||||
/* if the buffer was empty, then we know that all we are doing is
|
/* if the buffer was empty, then we know that all we are doing is
|
||||||
* launching debugger daemons
|
* launching debugger daemons
|
||||||
*/
|
*/
|
||||||
if (ORTE_ERR_UNPACK_READ_PAST_END_OF_BUFFER == rc) {
|
if (ORTE_ERR_UNPACK_READ_PAST_END_OF_BUFFER == OPAL_SOS_GET_ERROR_CODE(rc)) {
|
||||||
goto done;
|
goto done;
|
||||||
}
|
}
|
||||||
*job = ORTE_JOBID_INVALID;
|
*job = ORTE_JOBID_INVALID;
|
||||||
@ -1970,7 +1971,7 @@ int orte_odls_base_default_deliver_message(orte_jobid_t job, opal_buffer_t *buff
|
|||||||
|
|
||||||
/* if so, send the message */
|
/* if so, send the message */
|
||||||
rc = orte_rml.send_buffer(child->name, buffer, tag, 0);
|
rc = orte_rml.send_buffer(child->name, buffer, tag, 0);
|
||||||
if (rc < 0 && rc != ORTE_ERR_ADDRESSEE_UNKNOWN) {
|
if (rc < 0 && OPAL_SOS_GET_ERROR_CODE(rc) != ORTE_ERR_ADDRESSEE_UNKNOWN) {
|
||||||
/* ignore if the addressee is unknown as a race condition could
|
/* ignore if the addressee is unknown as a race condition could
|
||||||
* have allowed the child to exit before we send it a barrier
|
* have allowed the child to exit before we send it a barrier
|
||||||
* due to the vagaries of the event library
|
* due to the vagaries of the event library
|
||||||
@ -2817,9 +2818,10 @@ int orte_odls_base_default_kill_local_procs(opal_pointer_array_t *procs,
|
|||||||
* the child object from our local list
|
* the child object from our local list
|
||||||
*/
|
*/
|
||||||
if (child->iof_complete && child->waitpid_recvd) {
|
if (child->iof_complete && child->waitpid_recvd) {
|
||||||
if (ORTE_ERR_SILENT == orte_errmgr.update_state(ORTE_JOBID_INVALID, ORTE_JOB_STATE_UNDEF,
|
rc = orte_errmgr.update_state(ORTE_JOBID_INVALID, ORTE_JOB_STATE_UNDEF,
|
||||||
child->name, child->state,
|
child->name, child->state,
|
||||||
child->exit_code)) {
|
child->exit_code);
|
||||||
|
if (ORTE_ERR_SILENT == OPAL_SOS_GET_ERROR_CODE(rc)) {
|
||||||
/* all procs are complete - we are done */
|
/* all procs are complete - we are done */
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -2839,7 +2841,7 @@ int orte_odls_base_default_kill_local_procs(opal_pointer_array_t *procs,
|
|||||||
opal_condition_signal(&orte_odls_globals.cond);
|
opal_condition_signal(&orte_odls_globals.cond);
|
||||||
OPAL_THREAD_UNLOCK(&orte_odls_globals.mutex);
|
OPAL_THREAD_UNLOCK(&orte_odls_globals.mutex);
|
||||||
|
|
||||||
return rc;
|
return ORTE_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
int orte_odls_base_get_proc_stats(opal_buffer_t *answer,
|
int orte_odls_base_get_proc_stats(opal_buffer_t *answer,
|
||||||
|
@ -35,6 +35,7 @@
|
|||||||
#include "orte/util/show_help.h"
|
#include "orte/util/show_help.h"
|
||||||
#include "orte/runtime/orte_globals.h"
|
#include "orte/runtime/orte_globals.h"
|
||||||
#include "opal/util/basename.h"
|
#include "opal/util/basename.h"
|
||||||
|
#include "opal/util/opal_sos.h"
|
||||||
|
|
||||||
#include "orte/util/name_fns.h"
|
#include "orte/util/name_fns.h"
|
||||||
#include "orte/util/proc_info.h"
|
#include "orte/util/proc_info.h"
|
||||||
|
@ -73,6 +73,7 @@
|
|||||||
#include "opal/mca/paffinity/base/base.h"
|
#include "opal/mca/paffinity/base/base.h"
|
||||||
#include "opal/class/opal_pointer_array.h"
|
#include "opal/class/opal_pointer_array.h"
|
||||||
#include "opal/util/opal_environ.h"
|
#include "opal/util/opal_environ.h"
|
||||||
|
#include "opal/util/opal_sos.h"
|
||||||
|
|
||||||
#include "orte/util/show_help.h"
|
#include "orte/util/show_help.h"
|
||||||
#include "orte/runtime/orte_wait.h"
|
#include "orte/runtime/orte_wait.h"
|
||||||
@ -352,6 +353,17 @@ static int odls_default_fork_local_proc(orte_app_context_t* context,
|
|||||||
ORTE_NAME_PRINT(child->name), child->slot_list);
|
ORTE_NAME_PRINT(child->name), child->slot_list);
|
||||||
}
|
}
|
||||||
if (ORTE_SUCCESS != (rc = opal_paffinity_base_slot_list_set((long)child->name->vpid, child->slot_list, &mask))) {
|
if (ORTE_SUCCESS != (rc = opal_paffinity_base_slot_list_set((long)child->name->vpid, child->slot_list, &mask))) {
|
||||||
|
if (ORTE_ERR_NOT_SUPPORTED == OPAL_SOS_GET_ERROR_CODE(rc)) {
|
||||||
|
/* OS doesn't support providing topology information */
|
||||||
|
orte_show_help("help-odls-default.txt",
|
||||||
|
"odls-default:topo-not-supported",
|
||||||
|
true, orte_process_info.nodename, "rankfile containing a slot_list of ",
|
||||||
|
child->slot_list, context->app);
|
||||||
|
ORTE_ODLS_ERROR_OUT(rc);
|
||||||
|
}
|
||||||
|
|
||||||
|
orte_show_help("help-odls-default.txt",
|
||||||
|
"odls-default:slot-list-failed", true, child->slot_list, ORTE_ERROR_NAME(rc));
|
||||||
ORTE_ODLS_ERROR_OUT(rc);
|
ORTE_ODLS_ERROR_OUT(rc);
|
||||||
}
|
}
|
||||||
/* if we didn't wind up bound, then generate a warning unless suppressed */
|
/* if we didn't wind up bound, then generate a warning unless suppressed */
|
||||||
@ -403,7 +415,7 @@ static int odls_default_fork_local_proc(orte_app_context_t* context,
|
|||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
target_socket = opal_paffinity_base_get_physical_socket_id(logical_skt);
|
target_socket = opal_paffinity_base_get_physical_socket_id(logical_skt);
|
||||||
if (target_socket < 0) {
|
if (ORTE_ERR_NOT_SUPPORTED == OPAL_SOS_GET_ERROR_CODE(target_socket)) {
|
||||||
/* OS doesn't support providing topology information */
|
/* OS doesn't support providing topology information */
|
||||||
ORTE_ODLS_IF_BIND_NOT_REQD(5);
|
ORTE_ODLS_IF_BIND_NOT_REQD(5);
|
||||||
ORTE_ODLS_ERROR_OUT(target_socket);
|
ORTE_ODLS_ERROR_OUT(target_socket);
|
||||||
@ -427,7 +439,7 @@ static int odls_default_fork_local_proc(orte_app_context_t* context,
|
|||||||
* from when we initialized
|
* from when we initialized
|
||||||
*/
|
*/
|
||||||
target_socket = opal_paffinity_base_get_physical_socket_id(lrank % orte_odls_globals.num_sockets);
|
target_socket = opal_paffinity_base_get_physical_socket_id(lrank % orte_odls_globals.num_sockets);
|
||||||
if (target_socket < 0) {
|
if (ORTE_ERR_NOT_SUPPORTED == OPAL_SOS_GET_ERROR_CODE(target_socket)) {
|
||||||
/* OS does not support providing topology information */
|
/* OS does not support providing topology information */
|
||||||
ORTE_ODLS_IF_BIND_NOT_REQD(5);
|
ORTE_ODLS_IF_BIND_NOT_REQD(5);
|
||||||
ORTE_ODLS_ERROR_OUT(target_socket);
|
ORTE_ODLS_ERROR_OUT(target_socket);
|
||||||
@ -504,7 +516,8 @@ static int odls_default_fork_local_proc(orte_app_context_t* context,
|
|||||||
* physical cpu
|
* physical cpu
|
||||||
*/
|
*/
|
||||||
phys_cpu = opal_paffinity_base_get_physical_processor_id(logical_cpu);
|
phys_cpu = opal_paffinity_base_get_physical_processor_id(logical_cpu);
|
||||||
if (0 > phys_cpu) {
|
if (OPAL_SUCCESS != phys_cpu){
|
||||||
|
/* No processor to bind to so error out */
|
||||||
ORTE_ODLS_IF_BIND_NOT_REQD(5);
|
ORTE_ODLS_IF_BIND_NOT_REQD(5);
|
||||||
ORTE_ODLS_ERROR_OUT(phys_cpu);
|
ORTE_ODLS_ERROR_OUT(phys_cpu);
|
||||||
}
|
}
|
||||||
@ -568,7 +581,7 @@ static int odls_default_fork_local_proc(orte_app_context_t* context,
|
|||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
target_socket = opal_paffinity_base_get_physical_socket_id(logical_skt);
|
target_socket = opal_paffinity_base_get_physical_socket_id(logical_skt);
|
||||||
if (target_socket < 0) {
|
if (ORTE_ERR_NOT_SUPPORTED == OPAL_SOS_GET_ERROR_CODE(target_socket)) {
|
||||||
/* OS doesn't support providing topology information */
|
/* OS doesn't support providing topology information */
|
||||||
ORTE_ODLS_IF_BIND_NOT_REQD(6);
|
ORTE_ODLS_IF_BIND_NOT_REQD(6);
|
||||||
ORTE_ODLS_ERROR_OUT(target_socket);
|
ORTE_ODLS_ERROR_OUT(target_socket);
|
||||||
@ -588,7 +601,7 @@ static int odls_default_fork_local_proc(orte_app_context_t* context,
|
|||||||
* from when we initialized
|
* from when we initialized
|
||||||
*/
|
*/
|
||||||
target_socket = opal_paffinity_base_get_physical_socket_id(lrank % orte_odls_globals.num_sockets);
|
target_socket = opal_paffinity_base_get_physical_socket_id(lrank % orte_odls_globals.num_sockets);
|
||||||
if (target_socket < 0) {
|
if (ORTE_ERR_NOT_SUPPORTED == OPAL_SOS_GET_ERROR_CODE(target_socket)) {
|
||||||
/* OS does not support providing topology information */
|
/* OS does not support providing topology information */
|
||||||
ORTE_ODLS_IF_BIND_NOT_REQD(6);
|
ORTE_ODLS_IF_BIND_NOT_REQD(6);
|
||||||
ORTE_ODLS_ERROR_OUT(target_socket);
|
ORTE_ODLS_ERROR_OUT(target_socket);
|
||||||
@ -635,7 +648,7 @@ static int odls_default_fork_local_proc(orte_app_context_t* context,
|
|||||||
if (1 == orte_odls_globals.num_sockets) {
|
if (1 == orte_odls_globals.num_sockets) {
|
||||||
/* if we only have one socket, then just put it there */
|
/* if we only have one socket, then just put it there */
|
||||||
target_socket = opal_paffinity_base_get_physical_socket_id(0);
|
target_socket = opal_paffinity_base_get_physical_socket_id(0);
|
||||||
if (target_socket < 0) {
|
if (ORTE_ERR_NOT_SUPPORTED == OPAL_SOS_GET_ERROR_CODE(target_socket)) {
|
||||||
/* OS doesn't support providing topology information */
|
/* OS doesn't support providing topology information */
|
||||||
ORTE_ODLS_IF_BIND_NOT_REQD(6);
|
ORTE_ODLS_IF_BIND_NOT_REQD(6);
|
||||||
ORTE_ODLS_ERROR_OUT(target_socket);
|
ORTE_ODLS_ERROR_OUT(target_socket);
|
||||||
@ -647,7 +660,7 @@ static int odls_default_fork_local_proc(orte_app_context_t* context,
|
|||||||
logical_skt = logical_skt % orte_odls_globals.num_sockets;
|
logical_skt = logical_skt % orte_odls_globals.num_sockets;
|
||||||
/* now get the target physical socket */
|
/* now get the target physical socket */
|
||||||
target_socket = opal_paffinity_base_get_physical_socket_id(logical_skt);
|
target_socket = opal_paffinity_base_get_physical_socket_id(logical_skt);
|
||||||
if (target_socket < 0) {
|
if (ORTE_ERR_NOT_SUPPORTED == OPAL_SOS_GET_ERROR_CODE(target_socket)) {
|
||||||
/* OS doesn't support providing topology information */
|
/* OS doesn't support providing topology information */
|
||||||
ORTE_ODLS_IF_BIND_NOT_REQD(6);
|
ORTE_ODLS_IF_BIND_NOT_REQD(6);
|
||||||
ORTE_ODLS_ERROR_OUT(target_socket);
|
ORTE_ODLS_ERROR_OUT(target_socket);
|
||||||
|
@ -48,6 +48,7 @@
|
|||||||
#include "opal/util/show_help.h"
|
#include "opal/util/show_help.h"
|
||||||
#include "opal/util/error.h"
|
#include "opal/util/error.h"
|
||||||
#include "opal/util/output.h"
|
#include "opal/util/output.h"
|
||||||
|
#include "opal/util/opal_sos.h"
|
||||||
#include "opal/opal_socket_errno.h"
|
#include "opal/opal_socket_errno.h"
|
||||||
#include "opal/util/if.h"
|
#include "opal/util/if.h"
|
||||||
#include "opal/util/net.h"
|
#include "opal/util/net.h"
|
||||||
@ -2032,7 +2033,7 @@ mca_oob_tcp_get_new_name(orte_process_name_t* name)
|
|||||||
MCA_OOB_TCP_HDR_HTON(&msg->msg_hdr);
|
MCA_OOB_TCP_HDR_HTON(&msg->msg_hdr);
|
||||||
rc = mca_oob_tcp_peer_send(peer, msg);
|
rc = mca_oob_tcp_peer_send(peer, msg);
|
||||||
if(rc != ORTE_SUCCESS) {
|
if(rc != ORTE_SUCCESS) {
|
||||||
if (rc != ORTE_ERR_ADDRESSEE_UNKNOWN) {
|
if (OPAL_SOS_GET_ERROR_CODE(rc) != ORTE_ERR_ADDRESSEE_UNKNOWN) {
|
||||||
MCA_OOB_TCP_MSG_RETURN(msg);
|
MCA_OOB_TCP_MSG_RETURN(msg);
|
||||||
}
|
}
|
||||||
return rc;
|
return rc;
|
||||||
|
@ -28,6 +28,7 @@
|
|||||||
#include "opal/types.h"
|
#include "opal/types.h"
|
||||||
|
|
||||||
#include "opal/opal_socket_errno.h"
|
#include "opal/opal_socket_errno.h"
|
||||||
|
#include "opal/util/opal_sos.h"
|
||||||
|
|
||||||
#include "opal/class/opal_hash_table.h"
|
#include "opal/class/opal_hash_table.h"
|
||||||
#include "orte/util/name_fns.h"
|
#include "orte/util/name_fns.h"
|
||||||
|
Некоторые файлы не были показаны из-за слишком большого количества измененных файлов Показать больше
Загрузка…
x
Ссылка в новой задаче
Block a user