- first cut at tcp btl (working but not optimal)
- reworked btl error logging macros This commit was SVN r6701.
Этот коммит содержится в:
родитель
ed1022afd3
Коммит
2214f0502d
@ -30,6 +30,7 @@ headers = \
|
|||||||
libmca_btl_base_la_SOURCES = \
|
libmca_btl_base_la_SOURCES = \
|
||||||
$(headers) \
|
$(headers) \
|
||||||
btl_base_close.c \
|
btl_base_close.c \
|
||||||
|
btl_base_error.c \
|
||||||
btl_base_open.c \
|
btl_base_open.c \
|
||||||
btl_base_select.c
|
btl_base_select.c
|
||||||
|
|
||||||
|
32
ompi/mca/btl/base/btl_base_error.c
Обычный файл
32
ompi/mca/btl/base/btl_base_error.c
Обычный файл
@ -0,0 +1,32 @@
|
|||||||
|
#include "btl_base_error.h"
|
||||||
|
#include <stdarg.h>
|
||||||
|
|
||||||
|
#if OMPI_ENABLE_DEBUG
|
||||||
|
int mca_btl_base_debug = 1;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
int mca_btl_base_err(const char* fmt, ...)
|
||||||
|
{
|
||||||
|
va_list list;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
va_start(list, fmt);
|
||||||
|
ret = vfprintf(stderr, fmt, list);
|
||||||
|
va_end(list);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int mca_btl_base_out(const char* fmt, ...)
|
||||||
|
{
|
||||||
|
va_list list;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
va_start(list, fmt);
|
||||||
|
ret = vfprintf(stdout, fmt, list);
|
||||||
|
va_end(list);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
@ -17,67 +17,49 @@
|
|||||||
#ifndef MCA_BTL_BASE_ERROR_H
|
#ifndef MCA_BTL_BASE_ERROR_H
|
||||||
#define MCA_BTL_BASE_ERROR_H
|
#define MCA_BTL_BASE_ERROR_H
|
||||||
|
|
||||||
|
#include "ompi_config.h"
|
||||||
|
#include <stdio.h>
|
||||||
|
|
||||||
#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)
|
extern int mca_btl_base_debug;
|
||||||
# define BTL_ERROR(fmt, ...) { \
|
|
||||||
opal_output(0, "[%s:%d:%s] my_name: [%lu,%lu,%lu] " fmt "\n", __FILE__, __LINE__, __func__, \
|
|
||||||
ORTE_NAME_ARGS(orte_process_info.my_name), __VA_ARGS__); \
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
# if defined(__GNUC__) && !defined(__STDC__)
|
|
||||||
#define BTL_ERROR(fmt, args...) { \
|
|
||||||
opal_output(0, "[%s:%d:%s] my_name: [%lu,%lu,%lu]" fmt "\n", __FILE__, __LINE__, __func__,\
|
|
||||||
ORTE_NAME_ARGS(orte_process_info.my_name), ##args); \
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
static inline void BTL_ERROR(char *fmt, ... )
|
|
||||||
{
|
|
||||||
va_list list;
|
|
||||||
va_start(list, fmt);
|
|
||||||
fprintf(stderr,"[%s:%d:%s] my_name: [%lu,%lu,%lu]",
|
|
||||||
__FILE__, __LINE__, __func__,
|
|
||||||
ORTE_NAME_ARGS(orte_process_info.my_name));
|
|
||||||
|
|
||||||
vfprintf(stderr, fmt, list);
|
extern int mca_btl_base_err(const char*, ...);
|
||||||
fprintf(stderr, "\n");
|
extern int mca_btl_base_out(const char*, ...);
|
||||||
va_end(list);
|
|
||||||
}
|
|
||||||
#endif
|
#define BTL_OUTPUT(args) \
|
||||||
#endif
|
do { \
|
||||||
#if 0
|
mca_btl_base_out("[%lu,%lu,%lu][%s:%d:%s] ", \
|
||||||
#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901)L
|
ORTE_NAME_ARGS(orte_process_info.my_name), \
|
||||||
# define BTL_DEBUG_OUT(fmt, ...) { \
|
__FILE__, __LINE__, __func__); \
|
||||||
opal_output(0, "[%s:%d:%s] " fmt "\n", __FILE__, __LINE__, __func__, __VA_ARGS__); \
|
mca_btl_base_out args; \
|
||||||
}
|
mca_btl_base_out("\n"); \
|
||||||
#else
|
} while(0);
|
||||||
# if defined(__GNUC__) && !defined(__STDC__)
|
|
||||||
#define BTL_DEBUG_OUT(fmt, args...) { \
|
|
||||||
opal_output(0, "[%s:%d:%s] " fmt "\n", __FILE__, __LINE__, __func__, ##args); \
|
#define BTL_ERROR(args) \
|
||||||
}
|
do { \
|
||||||
#else
|
mca_btl_base_err("[%lu,%lu,%lu][%s:%d:%s] ", \
|
||||||
static inline void BTL_DEBUG_OUT(char *fmt, ... )
|
ORTE_NAME_ARGS(orte_process_info.my_name), \
|
||||||
{
|
__FILE__, __LINE__, __func__); \
|
||||||
va_list list;
|
mca_btl_base_err args; \
|
||||||
va_start(list, fmt);
|
mca_btl_base_out("\n"); \
|
||||||
fprintf(stderr, "[%s:%d:%s]", __FILE__, __LINE__, __func__, list);
|
} while(0);
|
||||||
vfprintf(stderr, fmt, list);
|
|
||||||
vfpritnf(stderr, "\n");
|
|
||||||
va_end(list);
|
#if OMPI_ENABLE_DEBUG
|
||||||
}
|
#define BTL_DEBUG(args) \
|
||||||
#endif
|
do { \
|
||||||
#endif
|
if(mca_btl_base_debug) { \
|
||||||
|
mca_btl_base_err("[%lu,%lu,%lu][%s:%d:%s] ", \
|
||||||
|
ORTE_NAME_ARGS(orte_process_info.my_name), \
|
||||||
|
__FILE__, __LINE__, __func__); \
|
||||||
|
mca_btl_base_err args; \
|
||||||
|
mca_btl_base_out("\n"); \
|
||||||
|
} \
|
||||||
|
} while(0);
|
||||||
#else
|
#else
|
||||||
#if defined(ACCEPT_C99) && __STDC_VERSION__ >= 199901L
|
#define BTL_DEBUG(args)
|
||||||
# define BTL_DEBUG_OUT(fmt, ...)
|
|
||||||
#else
|
|
||||||
# if defined(__GNUC__) && !defined(__STDC__)
|
|
||||||
#define BTL_DEBUG_OUT(fmt, args...)
|
|
||||||
#else
|
|
||||||
static inline void BTL_DEBUG_OUT(char *fmt, ... )
|
|
||||||
{
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
#endif
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -25,6 +25,9 @@
|
|||||||
#include "mca/btl/btl.h"
|
#include "mca/btl/btl.h"
|
||||||
#include "mca/btl/base/base.h"
|
#include "mca/btl/base/base.h"
|
||||||
|
|
||||||
|
int mca_btl_base_debug;
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* mca_btl_base_descriptor_t
|
* mca_btl_base_descriptor_t
|
||||||
*/
|
*/
|
||||||
|
@ -127,7 +127,7 @@ int mca_btl_mvapi_del_procs(struct mca_btl_base_module_t* btl,
|
|||||||
struct mca_btl_base_endpoint_t ** peers)
|
struct mca_btl_base_endpoint_t ** peers)
|
||||||
{
|
{
|
||||||
/* Stub */
|
/* Stub */
|
||||||
BTL_DEBUG_OUT("Stub\n");
|
BTL_DEBUG(("Stub\n"));
|
||||||
return OMPI_SUCCESS;
|
return OMPI_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -209,7 +209,7 @@ int mca_btl_mvapi_free(
|
|||||||
} else if(frag->size == mca_btl_mvapi_component.eager_limit){
|
} else if(frag->size == mca_btl_mvapi_component.eager_limit){
|
||||||
MCA_BTL_IB_FRAG_RETURN_EAGER(btl, frag);
|
MCA_BTL_IB_FRAG_RETURN_EAGER(btl, frag);
|
||||||
} else {
|
} else {
|
||||||
BTL_ERROR("invalid descriptor");
|
BTL_ERROR(("invalid descriptor"));
|
||||||
}
|
}
|
||||||
return OMPI_SUCCESS;
|
return OMPI_SUCCESS;
|
||||||
}
|
}
|
||||||
@ -288,13 +288,13 @@ mca_btl_base_descriptor_t* mca_btl_mvapi_prepare_src(
|
|||||||
|
|
||||||
rc = mca_mpool_base_remove((void*) vapi_reg->base_reg.base);
|
rc = mca_mpool_base_remove((void*) vapi_reg->base_reg.base);
|
||||||
if(OMPI_SUCCESS != rc) {
|
if(OMPI_SUCCESS != rc) {
|
||||||
BTL_ERROR("error removing memory region from memory pool tree");
|
BTL_ERROR(("error removing memory region from memory pool tree"));
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
if(is_leave_pinned) {
|
if(is_leave_pinned) {
|
||||||
if(NULL == opal_list_remove_item(&mvapi_btl->reg_mru_list, (opal_list_item_t*) vapi_reg)){
|
if(NULL == opal_list_remove_item(&mvapi_btl->reg_mru_list, (opal_list_item_t*) vapi_reg)){
|
||||||
BTL_ERROR("error removing item from reg_mru_list");
|
BTL_ERROR(("error removing item from reg_mru_list"));
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -315,7 +315,7 @@ mca_btl_base_descriptor_t* mca_btl_mvapi_prepare_src(
|
|||||||
|
|
||||||
|
|
||||||
if(rc != OMPI_SUCCESS) {
|
if(rc != OMPI_SUCCESS) {
|
||||||
BTL_ERROR("error inserting memory region into memory pool tree");
|
BTL_ERROR(("error inserting memory region into memory pool tree"));
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -329,7 +329,7 @@ mca_btl_base_descriptor_t* mca_btl_mvapi_prepare_src(
|
|||||||
else if(is_leave_pinned) {
|
else if(is_leave_pinned) {
|
||||||
/* the current memory region is large enough and we should leave the memory pinned */
|
/* the current memory region is large enough and we should leave the memory pinned */
|
||||||
if(NULL == opal_list_remove_item(&mvapi_btl->reg_mru_list, (opal_list_item_t*) vapi_reg)) {
|
if(NULL == opal_list_remove_item(&mvapi_btl->reg_mru_list, (opal_list_item_t*) vapi_reg)) {
|
||||||
BTL_ERROR("error removing item from reg_mru_list");
|
BTL_ERROR(("error removing item from reg_mru_list"));
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -391,7 +391,7 @@ mca_btl_base_descriptor_t* mca_btl_mvapi_prepare_src(
|
|||||||
opal_list_remove_first(&mvapi_btl->reg_mru_list);
|
opal_list_remove_first(&mvapi_btl->reg_mru_list);
|
||||||
|
|
||||||
if( NULL == old_reg) {
|
if( NULL == old_reg) {
|
||||||
BTL_ERROR("error removing item from reg_mru_list");
|
BTL_ERROR(("error removing item from reg_mru_list"));
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -399,7 +399,7 @@ mca_btl_base_descriptor_t* mca_btl_mvapi_prepare_src(
|
|||||||
rc = mca_mpool_base_remove((void*) old_reg->base_reg.base);
|
rc = mca_mpool_base_remove((void*) old_reg->base_reg.base);
|
||||||
|
|
||||||
if(OMPI_SUCCESS != rc) {
|
if(OMPI_SUCCESS != rc) {
|
||||||
BTL_ERROR("error removing memory region from memory pool tree");
|
BTL_ERROR(("error removing memory region from memory pool tree"));
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -570,7 +570,7 @@ mca_btl_base_descriptor_t* mca_btl_mvapi_prepare_dst(
|
|||||||
|
|
||||||
rc = mca_mpool_base_remove((void*) vapi_reg->base_reg.base);
|
rc = mca_mpool_base_remove((void*) vapi_reg->base_reg.base);
|
||||||
if(OMPI_SUCCESS != rc) {
|
if(OMPI_SUCCESS != rc) {
|
||||||
BTL_ERROR("error removing memory region from memory pool tree");
|
BTL_ERROR(("error removing memory region from memory pool tree"));
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -580,7 +580,7 @@ mca_btl_base_descriptor_t* mca_btl_mvapi_prepare_dst(
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
if(NULL == opal_list_remove_item(&mvapi_btl->reg_mru_list, (opal_list_item_t*) vapi_reg)) {
|
if(NULL == opal_list_remove_item(&mvapi_btl->reg_mru_list, (opal_list_item_t*) vapi_reg)) {
|
||||||
BTL_ERROR("error removing item from reg_mru_list");
|
BTL_ERROR(("error removing item from reg_mru_list"));
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -599,7 +599,7 @@ mca_btl_base_descriptor_t* mca_btl_mvapi_prepare_dst(
|
|||||||
(mca_mpool_base_registration_t*) vapi_reg);
|
(mca_mpool_base_registration_t*) vapi_reg);
|
||||||
|
|
||||||
if(OMPI_SUCCESS != rc) {
|
if(OMPI_SUCCESS != rc) {
|
||||||
BTL_ERROR("error inserting memory region into memory pool tree");
|
BTL_ERROR(("error inserting memory region into memory pool tree"));
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
OBJ_RETAIN(vapi_reg);
|
OBJ_RETAIN(vapi_reg);
|
||||||
@ -614,7 +614,7 @@ mca_btl_base_descriptor_t* mca_btl_mvapi_prepare_dst(
|
|||||||
else if(is_leave_pinned){
|
else if(is_leave_pinned){
|
||||||
/* the current memory region is large enough and we should leave the memory pinned */
|
/* the current memory region is large enough and we should leave the memory pinned */
|
||||||
if(NULL == opal_list_remove_item(&mvapi_btl->reg_mru_list, (opal_list_item_t*) vapi_reg)) {
|
if(NULL == opal_list_remove_item(&mvapi_btl->reg_mru_list, (opal_list_item_t*) vapi_reg)) {
|
||||||
BTL_ERROR("error removing item from reg_mru_list");
|
BTL_ERROR(("error removing item from reg_mru_list"));
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
opal_list_append(&mvapi_btl->reg_mru_list, (opal_list_item_t*) vapi_reg);
|
opal_list_append(&mvapi_btl->reg_mru_list, (opal_list_item_t*) vapi_reg);
|
||||||
@ -639,13 +639,13 @@ mca_btl_base_descriptor_t* mca_btl_mvapi_prepare_dst(
|
|||||||
opal_list_remove_first(&mvapi_btl->reg_mru_list);
|
opal_list_remove_first(&mvapi_btl->reg_mru_list);
|
||||||
|
|
||||||
if( NULL == old_reg) {
|
if( NULL == old_reg) {
|
||||||
BTL_ERROR("error removing item from reg_mru_list");
|
BTL_ERROR(("error removing item from reg_mru_list"));
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
rc = mca_mpool_base_remove((void*) old_reg->base_reg.base);
|
rc = mca_mpool_base_remove((void*) old_reg->base_reg.base);
|
||||||
if(OMPI_SUCCESS !=rc ) {
|
if(OMPI_SUCCESS !=rc ) {
|
||||||
BTL_ERROR("error removing memory region from memory pool tree");
|
BTL_ERROR(("error removing memory region from memory pool tree"));
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -665,7 +665,7 @@ mca_btl_base_descriptor_t* mca_btl_mvapi_prepare_dst(
|
|||||||
(void*) (&mvapi_btl->super),
|
(void*) (&mvapi_btl->super),
|
||||||
(mca_mpool_base_registration_t*) vapi_reg);
|
(mca_mpool_base_registration_t*) vapi_reg);
|
||||||
if(OMPI_SUCCESS != rc){
|
if(OMPI_SUCCESS != rc){
|
||||||
BTL_ERROR("error inserting memory region into memory pool");
|
BTL_ERROR(("error inserting memory region into memory pool"));
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -822,8 +822,7 @@ static void async_event_handler(VAPI_hca_hndl_t hca_hndl,
|
|||||||
case VAPI_SEND_QUEUE_DRAINED:
|
case VAPI_SEND_QUEUE_DRAINED:
|
||||||
case VAPI_PORT_ACTIVE:
|
case VAPI_PORT_ACTIVE:
|
||||||
{
|
{
|
||||||
BTL_DEBUG_OUT("Got an asynchronous event: %s\n",
|
BTL_DEBUG(("Got an asynchronous event: %s\n", VAPI_event_record_sym(event_p->type)));
|
||||||
VAPI_event_record_sym(event_p->type));
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case VAPI_CQ_ERROR:
|
case VAPI_CQ_ERROR:
|
||||||
@ -835,14 +834,14 @@ static void async_event_handler(VAPI_hca_hndl_t hca_hndl,
|
|||||||
case VAPI_LOCAL_CATASTROPHIC_ERROR:
|
case VAPI_LOCAL_CATASTROPHIC_ERROR:
|
||||||
case VAPI_PORT_ERROR:
|
case VAPI_PORT_ERROR:
|
||||||
{
|
{
|
||||||
BTL_ERROR("Got an asynchronous event: %s (%s)",
|
BTL_ERROR(("Got an asynchronous event: %s (%s)",
|
||||||
VAPI_event_record_sym(event_p->type),
|
VAPI_event_record_sym(event_p->type),
|
||||||
VAPI_event_syndrome_sym(event_p->syndrome));
|
VAPI_event_syndrome_sym(event_p->syndrome)));
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
default:
|
default:
|
||||||
BTL_ERROR("Warning!! Got an undefined "
|
BTL_ERROR(("Warning!! Got an undefined "
|
||||||
"asynchronous event");
|
"asynchronous event"));
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
@ -863,7 +862,7 @@ int mca_btl_mvapi_module_init(mca_btl_mvapi_module_t *mvapi_btl)
|
|||||||
ret = VAPI_alloc_pd(mvapi_btl->nic, &mvapi_btl->ptag);
|
ret = VAPI_alloc_pd(mvapi_btl->nic, &mvapi_btl->ptag);
|
||||||
|
|
||||||
if(ret != VAPI_OK) {
|
if(ret != VAPI_OK) {
|
||||||
BTL_ERROR("error in VAPI_alloc_pd: %s", VAPI_strerror(ret));
|
BTL_ERROR(("error in VAPI_alloc_pd: %s", VAPI_strerror(ret)));
|
||||||
return OMPI_ERROR;
|
return OMPI_ERROR;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -880,7 +879,7 @@ int mca_btl_mvapi_module_init(mca_btl_mvapi_module_t *mvapi_btl)
|
|||||||
&mvapi_btl->srq_hndl_high,
|
&mvapi_btl->srq_hndl_high,
|
||||||
&srq_attr_out);
|
&srq_attr_out);
|
||||||
if(ret != VAPI_OK) {
|
if(ret != VAPI_OK) {
|
||||||
BTL_ERROR("error in VAPI_create_srq: %s", VAPI_strerror(ret));
|
BTL_ERROR(("error in VAPI_create_srq: %s", VAPI_strerror(ret)));
|
||||||
return OMPI_ERROR;
|
return OMPI_ERROR;
|
||||||
}
|
}
|
||||||
ret = VAPI_create_srq(mvapi_btl->nic,
|
ret = VAPI_create_srq(mvapi_btl->nic,
|
||||||
@ -888,7 +887,7 @@ int mca_btl_mvapi_module_init(mca_btl_mvapi_module_t *mvapi_btl)
|
|||||||
&mvapi_btl->srq_hndl_low,
|
&mvapi_btl->srq_hndl_low,
|
||||||
&srq_attr_out);
|
&srq_attr_out);
|
||||||
if(ret != VAPI_OK) {
|
if(ret != VAPI_OK) {
|
||||||
BTL_ERROR("error in VAPI_create_srq: %s", VAPI_strerror(ret));
|
BTL_ERROR(("error in VAPI_create_srq: %s", VAPI_strerror(ret)));
|
||||||
return OMPI_ERROR;
|
return OMPI_ERROR;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -901,7 +900,7 @@ int mca_btl_mvapi_module_init(mca_btl_mvapi_module_t *mvapi_btl)
|
|||||||
|
|
||||||
|
|
||||||
if( VAPI_OK != ret) {
|
if( VAPI_OK != ret) {
|
||||||
BTL_ERROR("error in VAPI_create_cq: %s", VAPI_strerror(ret));
|
BTL_ERROR(("error in VAPI_create_cq: %s", VAPI_strerror(ret)));
|
||||||
return OMPI_ERROR;
|
return OMPI_ERROR;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -910,13 +909,13 @@ int mca_btl_mvapi_module_init(mca_btl_mvapi_module_t *mvapi_btl)
|
|||||||
|
|
||||||
|
|
||||||
if( VAPI_OK != ret) {
|
if( VAPI_OK != ret) {
|
||||||
BTL_ERROR("error in VAPI_create_cq: %s", VAPI_strerror(ret));
|
BTL_ERROR(("error in VAPI_create_cq: %s", VAPI_strerror(ret)));
|
||||||
return OMPI_ERROR;
|
return OMPI_ERROR;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
if(cqe_cnt <= 0) {
|
if(cqe_cnt <= 0) {
|
||||||
BTL_ERROR("error creating completion queue ");
|
BTL_ERROR(("error creating completion queue "));
|
||||||
return OMPI_ERROR;
|
return OMPI_ERROR;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -924,7 +923,7 @@ int mca_btl_mvapi_module_init(mca_btl_mvapi_module_t *mvapi_btl)
|
|||||||
async_event_handler, 0, &mvapi_btl->async_handler);
|
async_event_handler, 0, &mvapi_btl->async_handler);
|
||||||
|
|
||||||
if(VAPI_OK != ret) {
|
if(VAPI_OK != ret) {
|
||||||
BTL_ERROR("error in EVAPI_set_async_event_handler: %s", VAPI_strerror(ret));
|
BTL_ERROR(("error in EVAPI_set_async_event_handler: %s", VAPI_strerror(ret)));
|
||||||
return OMPI_ERROR;
|
return OMPI_ERROR;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -240,10 +240,10 @@ struct mca_btl_mvapi_module_t {
|
|||||||
post_srr_sub_desc_post, \
|
post_srr_sub_desc_post, \
|
||||||
&post_srr_sub_rwqe_posted); \
|
&post_srr_sub_rwqe_posted); \
|
||||||
if(VAPI_OK != post_srr_sub_frag->ret) { \
|
if(VAPI_OK != post_srr_sub_frag->ret) { \
|
||||||
BTL_ERROR("error posting receive descriptors to shared receive queue: %s",\
|
BTL_ERROR(("error posting receive descriptors to shared receive queue: %s",\
|
||||||
VAPI_strerror(post_srr_sub_frag->ret)); \
|
VAPI_strerror(post_srr_sub_frag->ret))); \
|
||||||
} else if(post_srr_sub_rwqe_posted < 1) { \
|
} else if(post_srr_sub_rwqe_posted < 1) { \
|
||||||
BTL_ERROR("error posting receive descriptors to shared receive queue, number of entries posted is %d", post_srr_sub_rwqe_posted); \
|
BTL_ERROR(("error posting receive descriptors to shared receive queue, number of entries posted is %d", post_srr_sub_rwqe_posted)); \
|
||||||
} else {\
|
} else {\
|
||||||
OPAL_THREAD_ADD32(post_srr_sub_srr_posted, post_srr_sub_cnt); \
|
OPAL_THREAD_ADD32(post_srr_sub_srr_posted, post_srr_sub_cnt); \
|
||||||
}\
|
}\
|
||||||
|
@ -255,7 +255,7 @@ mca_btl_base_module_t** mca_btl_mvapi_component_init(int *num_btl_modules,
|
|||||||
/* Determine the number of hca's available on the host */
|
/* Determine the number of hca's available on the host */
|
||||||
vapi_ret=EVAPI_list_hcas(0, &num_hcas, NULL);
|
vapi_ret=EVAPI_list_hcas(0, &num_hcas, NULL);
|
||||||
if( VAPI_EAGAIN != vapi_ret || 0 == num_hcas ) {
|
if( VAPI_EAGAIN != vapi_ret || 0 == num_hcas ) {
|
||||||
BTL_ERROR("No hca's found on this host!");
|
BTL_ERROR(("No hca's found on this host!"));
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -284,14 +284,14 @@ mca_btl_base_module_t** mca_btl_mvapi_component_init(int *num_btl_modules,
|
|||||||
for(i = 0; i < num_hcas; i++){
|
for(i = 0; i < num_hcas; i++){
|
||||||
vapi_ret = EVAPI_get_hca_hndl(hca_ids[i], &hca_hndl);
|
vapi_ret = EVAPI_get_hca_hndl(hca_ids[i], &hca_hndl);
|
||||||
if(VAPI_OK != vapi_ret) {
|
if(VAPI_OK != vapi_ret) {
|
||||||
BTL_ERROR("error getting hca handle: %s", VAPI_strerror(vapi_ret));
|
BTL_ERROR(("error getting hca handle: %s", VAPI_strerror(vapi_ret)));
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
vapi_ret = VAPI_query_hca_cap(hca_hndl, &hca_vendor, &hca_cap);
|
vapi_ret = VAPI_query_hca_cap(hca_hndl, &hca_vendor, &hca_cap);
|
||||||
if(VAPI_OK != vapi_ret) {
|
if(VAPI_OK != vapi_ret) {
|
||||||
BTL_ERROR("error getting hca properties %s", VAPI_strerror(vapi_ret));
|
BTL_ERROR(("error getting hca properties %s", VAPI_strerror(vapi_ret)));
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -300,7 +300,7 @@ mca_btl_base_module_t** mca_btl_mvapi_component_init(int *num_btl_modules,
|
|||||||
for(j = 1; j <= hca_cap.phys_port_num; j++){
|
for(j = 1; j <= hca_cap.phys_port_num; j++){
|
||||||
vapi_ret = VAPI_query_hca_port_prop(hca_hndl, (IB_port_t) j, &hca_port);
|
vapi_ret = VAPI_query_hca_port_prop(hca_hndl, (IB_port_t) j, &hca_port);
|
||||||
if(VAPI_OK != vapi_ret) {
|
if(VAPI_OK != vapi_ret) {
|
||||||
BTL_ERROR("error getting hca port properties %s", VAPI_strerror(vapi_ret));
|
BTL_ERROR(("error getting hca port properties %s", VAPI_strerror(vapi_ret)));
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -385,7 +385,7 @@ mca_btl_base_module_t** mca_btl_mvapi_component_init(int *num_btl_modules,
|
|||||||
&hca_pd);
|
&hca_pd);
|
||||||
|
|
||||||
if(NULL == mvapi_btl->ib_pool) {
|
if(NULL == mvapi_btl->ib_pool) {
|
||||||
BTL_ERROR("error creating vapi memory pool! aborting mvapi btl initialization");
|
BTL_ERROR(("error creating vapi memory pool! aborting mvapi btl initialization"));
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
/* Initialize pool of send fragments */
|
/* Initialize pool of send fragments */
|
||||||
@ -495,16 +495,16 @@ int mca_btl_mvapi_component_progress()
|
|||||||
ret = VAPI_poll_cq(mvapi_btl->nic, mvapi_btl->cq_hndl_high, &comp);
|
ret = VAPI_poll_cq(mvapi_btl->nic, mvapi_btl->cq_hndl_high, &comp);
|
||||||
if(VAPI_OK == ret) {
|
if(VAPI_OK == ret) {
|
||||||
if(comp.status != VAPI_SUCCESS) {
|
if(comp.status != VAPI_SUCCESS) {
|
||||||
BTL_ERROR("Got error : %s, Vendor code : %d Frag : %p",
|
BTL_ERROR(("Got error : %s, Vendor code : %d Frag : %p",
|
||||||
VAPI_wc_status_sym(comp.status),
|
VAPI_wc_status_sym(comp.status),
|
||||||
comp.vendor_err_syndrome, comp.id);
|
comp.vendor_err_syndrome, comp.id));
|
||||||
return OMPI_ERROR;
|
return OMPI_ERROR;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Handle work completions */
|
/* Handle work completions */
|
||||||
switch(comp.opcode) {
|
switch(comp.opcode) {
|
||||||
case VAPI_CQE_RQ_RDMA_WITH_IMM:
|
case VAPI_CQE_RQ_RDMA_WITH_IMM:
|
||||||
BTL_ERROR("Got an RDMA with Immediate data!, not supported!");
|
BTL_ERROR(("Got an RDMA with Immediate data!, not supported!"));
|
||||||
return OMPI_ERROR;
|
return OMPI_ERROR;
|
||||||
|
|
||||||
case VAPI_CQE_SQ_RDMA_WRITE:
|
case VAPI_CQE_SQ_RDMA_WRITE:
|
||||||
@ -520,7 +520,7 @@ int mca_btl_mvapi_component_progress()
|
|||||||
case VAPI_CQE_RQ_SEND_DATA:
|
case VAPI_CQE_RQ_SEND_DATA:
|
||||||
|
|
||||||
/* Process a RECV */
|
/* Process a RECV */
|
||||||
BTL_DEBUG_OUT("Got a recv completion");
|
BTL_DEBUG(("Got a recv completion"));
|
||||||
frag = (mca_btl_mvapi_frag_t*) comp.id;
|
frag = (mca_btl_mvapi_frag_t*) comp.id;
|
||||||
endpoint = (mca_btl_mvapi_endpoint_t*) frag->endpoint;
|
endpoint = (mca_btl_mvapi_endpoint_t*) frag->endpoint;
|
||||||
|
|
||||||
@ -543,7 +543,7 @@ int mca_btl_mvapi_component_progress()
|
|||||||
break;
|
break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
BTL_ERROR("Unhandled work completion opcode is %d", comp.opcode);
|
BTL_ERROR(("Unhandled work completion opcode is %d", comp.opcode));
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -553,16 +553,16 @@ int mca_btl_mvapi_component_progress()
|
|||||||
ret = VAPI_poll_cq(mvapi_btl->nic, mvapi_btl->cq_hndl_low, &comp);
|
ret = VAPI_poll_cq(mvapi_btl->nic, mvapi_btl->cq_hndl_low, &comp);
|
||||||
if(VAPI_OK == ret) {
|
if(VAPI_OK == ret) {
|
||||||
if(comp.status != VAPI_SUCCESS) {
|
if(comp.status != VAPI_SUCCESS) {
|
||||||
BTL_ERROR("Got error : %s, Vendor code : %d Frag : %p",
|
BTL_ERROR(("Got error : %s, Vendor code : %d Frag : %p",
|
||||||
VAPI_wc_status_sym(comp.status),
|
VAPI_wc_status_sym(comp.status),
|
||||||
comp.vendor_err_syndrome, comp.id);
|
comp.vendor_err_syndrome, comp.id));
|
||||||
return OMPI_ERROR;
|
return OMPI_ERROR;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Handle n/w completions */
|
/* Handle n/w completions */
|
||||||
switch(comp.opcode) {
|
switch(comp.opcode) {
|
||||||
case VAPI_CQE_RQ_RDMA_WITH_IMM:
|
case VAPI_CQE_RQ_RDMA_WITH_IMM:
|
||||||
BTL_ERROR("Got an RDMA with Immediate data!, not supported!");
|
BTL_ERROR(("Got an RDMA with Immediate data!, not supported!"));
|
||||||
return OMPI_ERROR;
|
return OMPI_ERROR;
|
||||||
|
|
||||||
case VAPI_CQE_SQ_RDMA_WRITE:
|
case VAPI_CQE_SQ_RDMA_WRITE:
|
||||||
@ -577,7 +577,7 @@ int mca_btl_mvapi_component_progress()
|
|||||||
|
|
||||||
case VAPI_CQE_RQ_SEND_DATA:
|
case VAPI_CQE_RQ_SEND_DATA:
|
||||||
|
|
||||||
BTL_DEBUG_OUT("Got a recv completion");
|
BTL_DEBUG(("Got a recv completion"));
|
||||||
frag = (mca_btl_mvapi_frag_t*) comp.id;
|
frag = (mca_btl_mvapi_frag_t*) comp.id;
|
||||||
endpoint = (mca_btl_mvapi_endpoint_t*) frag->endpoint;
|
endpoint = (mca_btl_mvapi_endpoint_t*) frag->endpoint;
|
||||||
frag->rc=OMPI_SUCCESS;
|
frag->rc=OMPI_SUCCESS;
|
||||||
@ -599,7 +599,7 @@ int mca_btl_mvapi_component_progress()
|
|||||||
break;
|
break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
BTL_ERROR("Errorneous network completion");
|
BTL_ERROR(("Errorneous network completion"));
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -184,10 +184,10 @@ static int mca_btl_mvapi_endpoint_send_connect_req(mca_btl_base_endpoint_t* endp
|
|||||||
mca_btl_mvapi_endpoint_send_cb, NULL);
|
mca_btl_mvapi_endpoint_send_cb, NULL);
|
||||||
|
|
||||||
|
|
||||||
BTL_DEBUG_OUT("Sending High Priority QP num = %d, Low Priority QP num = %d, LID = %d",
|
BTL_DEBUG(("Sending High Priority QP num = %d, Low Priority QP num = %d, LID = %d",
|
||||||
endpoint->lcl_qp_prop_high.qp_num,
|
endpoint->lcl_qp_prop_high.qp_num,
|
||||||
endpoint->lcl_qp_prop_low.qp_num,
|
endpoint->lcl_qp_prop_low.qp_num,
|
||||||
endpoint->endpoint_btl->port.lid);
|
endpoint->endpoint_btl->port.lid));
|
||||||
|
|
||||||
if(rc < 0) {
|
if(rc < 0) {
|
||||||
ORTE_ERROR_LOG(rc);
|
ORTE_ERROR_LOG(rc);
|
||||||
@ -260,10 +260,10 @@ static int mca_btl_mvapi_endpoint_set_remote_info(mca_btl_base_endpoint_t* endpo
|
|||||||
ORTE_ERROR_LOG(rc);
|
ORTE_ERROR_LOG(rc);
|
||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
BTL_DEBUG_OUT("Received High Priority QP num = %d, Low Priority QP num %d, LID = %d",
|
BTL_DEBUG(("Received High Priority QP num = %d, Low Priority QP num %d, LID = %d",
|
||||||
endpoint->rem_qp_num_high,
|
endpoint->rem_qp_num_high,
|
||||||
endpoint->rem_qp_num_low,
|
endpoint->rem_qp_num_low,
|
||||||
endpoint->rem_lid);
|
endpoint->rem_lid));
|
||||||
|
|
||||||
return ORTE_SUCCESS;
|
return ORTE_SUCCESS;
|
||||||
}
|
}
|
||||||
@ -293,7 +293,7 @@ static int mca_btl_mvapi_endpoint_start_connect(mca_btl_base_endpoint_t* endpoin
|
|||||||
&endpoint->lcl_qp_hndl_high,
|
&endpoint->lcl_qp_hndl_high,
|
||||||
&endpoint->lcl_qp_prop_high,
|
&endpoint->lcl_qp_prop_high,
|
||||||
VAPI_TS_RC))) {
|
VAPI_TS_RC))) {
|
||||||
BTL_ERROR("error creating queue pair, error code %d", rc);
|
BTL_ERROR(("error creating queue pair, error code %d", rc));
|
||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -308,19 +308,19 @@ static int mca_btl_mvapi_endpoint_start_connect(mca_btl_base_endpoint_t* endpoin
|
|||||||
&endpoint->lcl_qp_prop_low,
|
&endpoint->lcl_qp_prop_low,
|
||||||
VAPI_TS_RC))) {
|
VAPI_TS_RC))) {
|
||||||
|
|
||||||
BTL_ERROR("error creating queue pair, error code %d", rc);
|
BTL_ERROR(("error creating queue pair, error code %d", rc));
|
||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
|
|
||||||
BTL_DEBUG_OUT("Initialized High Priority QP num = %d, Low Priority QP num = %d, LID = %d",
|
BTL_DEBUG(("Initialized High Priority QP num = %d, Low Priority QP num = %d, LID = %d",
|
||||||
endpoint->lcl_qp_prop_high.qp_num,
|
endpoint->lcl_qp_prop_high.qp_num,
|
||||||
endpoint->lcl_qp_prop_low.qp_num,
|
endpoint->lcl_qp_prop_low.qp_num,
|
||||||
endpoint->endpoint_btl->port.lid);
|
endpoint->endpoint_btl->port.lid));
|
||||||
|
|
||||||
/* Send connection info over to remote endpoint */
|
/* Send connection info over to remote endpoint */
|
||||||
endpoint->endpoint_state = MCA_BTL_IB_CONNECTING;
|
endpoint->endpoint_state = MCA_BTL_IB_CONNECTING;
|
||||||
if(OMPI_SUCCESS != (rc = mca_btl_mvapi_endpoint_send_connect_req(endpoint))) {
|
if(OMPI_SUCCESS != (rc = mca_btl_mvapi_endpoint_send_connect_req(endpoint))) {
|
||||||
BTL_ERROR("error sending connect request, error code %d", rc);
|
BTL_ERROR(("error sending connect request, error code %d", rc));
|
||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
return OMPI_SUCCESS;
|
return OMPI_SUCCESS;
|
||||||
@ -344,7 +344,7 @@ static int mca_btl_mvapi_endpoint_reply_start_connect(mca_btl_mvapi_endpoint_t *
|
|||||||
&endpoint->lcl_qp_hndl_high,
|
&endpoint->lcl_qp_hndl_high,
|
||||||
&endpoint->lcl_qp_prop_high,
|
&endpoint->lcl_qp_prop_high,
|
||||||
VAPI_TS_RC))) {
|
VAPI_TS_RC))) {
|
||||||
BTL_ERROR("error creating queue pair, error code %d", rc);
|
BTL_ERROR(("error creating queue pair, error code %d", rc));
|
||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -358,14 +358,14 @@ static int mca_btl_mvapi_endpoint_reply_start_connect(mca_btl_mvapi_endpoint_t *
|
|||||||
&endpoint->lcl_qp_hndl_low,
|
&endpoint->lcl_qp_hndl_low,
|
||||||
&endpoint->lcl_qp_prop_low,
|
&endpoint->lcl_qp_prop_low,
|
||||||
VAPI_TS_RC))) {
|
VAPI_TS_RC))) {
|
||||||
BTL_ERROR("error creating queue pair, error code %d", rc);
|
BTL_ERROR(("error creating queue pair, error code %d", rc));
|
||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
|
|
||||||
BTL_DEBUG_OUT("Initialized High Priority QP num = %d, Low Priority QP num = %d, LID = %d",
|
BTL_DEBUG(("Initialized High Priority QP num = %d, Low Priority QP num = %d, LID = %d",
|
||||||
endpoint->lcl_qp_prop_high.qp_num,
|
endpoint->lcl_qp_prop_high.qp_num,
|
||||||
endpoint->lcl_qp_prop_low.qp_num,
|
endpoint->lcl_qp_prop_low.qp_num,
|
||||||
endpoint->endpoint_btl->port.lid);
|
endpoint->endpoint_btl->port.lid));
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -377,13 +377,13 @@ static int mca_btl_mvapi_endpoint_reply_start_connect(mca_btl_mvapi_endpoint_t *
|
|||||||
|
|
||||||
rc = mca_btl_mvapi_endpoint_connect(endpoint);
|
rc = mca_btl_mvapi_endpoint_connect(endpoint);
|
||||||
if(rc != OMPI_SUCCESS) {
|
if(rc != OMPI_SUCCESS) {
|
||||||
BTL_ERROR("error in endpoint connect error code is %d", rc);
|
BTL_ERROR(("error in endpoint connect error code is %d", rc));
|
||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Send connection info over to remote endpoint */
|
/* Send connection info over to remote endpoint */
|
||||||
if(OMPI_SUCCESS != (rc = mca_btl_mvapi_endpoint_send_connect_req(endpoint))) {
|
if(OMPI_SUCCESS != (rc = mca_btl_mvapi_endpoint_send_connect_req(endpoint))) {
|
||||||
BTL_ERROR("error in endpoint send connect request error code is %d", rc);
|
BTL_ERROR(("error in endpoint send connect request error code is %d", rc));
|
||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
return OMPI_SUCCESS;
|
return OMPI_SUCCESS;
|
||||||
@ -446,7 +446,7 @@ static void mca_btl_mvapi_endpoint_recv(
|
|||||||
* and then reply with our QP information */
|
* and then reply with our QP information */
|
||||||
|
|
||||||
if(OMPI_SUCCESS != (rc = mca_btl_mvapi_endpoint_reply_start_connect(ib_endpoint, buffer))) {
|
if(OMPI_SUCCESS != (rc = mca_btl_mvapi_endpoint_reply_start_connect(ib_endpoint, buffer))) {
|
||||||
BTL_ERROR("error in endpoint reply start connect");
|
BTL_ERROR(("error in endpoint reply start connect"));
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -458,7 +458,7 @@ static void mca_btl_mvapi_endpoint_recv(
|
|||||||
|
|
||||||
mca_btl_mvapi_endpoint_set_remote_info(ib_endpoint, buffer);
|
mca_btl_mvapi_endpoint_set_remote_info(ib_endpoint, buffer);
|
||||||
if(OMPI_SUCCESS != (rc = mca_btl_mvapi_endpoint_connect(ib_endpoint))) {
|
if(OMPI_SUCCESS != (rc = mca_btl_mvapi_endpoint_connect(ib_endpoint))) {
|
||||||
BTL_ERROR("endpoint connect error: %d", rc);
|
BTL_ERROR(("endpoint connect error: %d", rc));
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -478,7 +478,7 @@ static void mca_btl_mvapi_endpoint_recv(
|
|||||||
case MCA_BTL_IB_CONNECTED :
|
case MCA_BTL_IB_CONNECTED :
|
||||||
break;
|
break;
|
||||||
default :
|
default :
|
||||||
BTL_ERROR("Invalid endpoint state %d", endpoint_state);
|
BTL_ERROR(("Invalid endpoint state %d", endpoint_state));
|
||||||
}
|
}
|
||||||
|
|
||||||
break;
|
break;
|
||||||
@ -519,7 +519,7 @@ int mca_btl_mvapi_endpoint_send(
|
|||||||
switch(endpoint->endpoint_state) {
|
switch(endpoint->endpoint_state) {
|
||||||
case MCA_BTL_IB_CONNECTING:
|
case MCA_BTL_IB_CONNECTING:
|
||||||
|
|
||||||
BTL_DEBUG_OUT("Queing because state is connecting");
|
BTL_DEBUG(("Queing because state is connecting"));
|
||||||
|
|
||||||
opal_list_append(&endpoint->pending_send_frags,
|
opal_list_append(&endpoint->pending_send_frags,
|
||||||
(opal_list_item_t *)frag);
|
(opal_list_item_t *)frag);
|
||||||
@ -529,7 +529,7 @@ int mca_btl_mvapi_endpoint_send(
|
|||||||
|
|
||||||
case MCA_BTL_IB_CONNECT_ACK:
|
case MCA_BTL_IB_CONNECT_ACK:
|
||||||
|
|
||||||
BTL_DEBUG_OUT("Queuing because waiting for ack");
|
BTL_DEBUG(("Queuing because waiting for ack"));
|
||||||
|
|
||||||
opal_list_append(&endpoint->pending_send_frags,
|
opal_list_append(&endpoint->pending_send_frags,
|
||||||
(opal_list_item_t *)frag);
|
(opal_list_item_t *)frag);
|
||||||
@ -539,7 +539,7 @@ int mca_btl_mvapi_endpoint_send(
|
|||||||
|
|
||||||
case MCA_BTL_IB_CLOSED:
|
case MCA_BTL_IB_CLOSED:
|
||||||
|
|
||||||
BTL_DEBUG_OUT("Connection to endpoint closed ... connecting ...");
|
BTL_DEBUG(("Connection to endpoint closed ... connecting ..."));
|
||||||
|
|
||||||
opal_list_append(&endpoint->pending_send_frags,
|
opal_list_append(&endpoint->pending_send_frags,
|
||||||
(opal_list_item_t *)frag);
|
(opal_list_item_t *)frag);
|
||||||
@ -558,10 +558,10 @@ int mca_btl_mvapi_endpoint_send(
|
|||||||
mvapi_btl = endpoint->endpoint_btl;
|
mvapi_btl = endpoint->endpoint_btl;
|
||||||
|
|
||||||
|
|
||||||
BTL_DEBUG_OUT("Send to : %d, len : %d, frag : %p",
|
BTL_DEBUG(("Send to : %d, len : %d, frag : %p",
|
||||||
endpoint->endpoint_proc->proc_guid.vpid,
|
endpoint->endpoint_proc->proc_guid.vpid,
|
||||||
frag->sg_entry.len,
|
frag->sg_entry.len,
|
||||||
frag);
|
frag));
|
||||||
|
|
||||||
rc = mca_btl_mvapi_endpoint_post_send(mvapi_btl, endpoint, frag);
|
rc = mca_btl_mvapi_endpoint_post_send(mvapi_btl, endpoint, frag);
|
||||||
|
|
||||||
@ -598,7 +598,7 @@ void mca_btl_mvapi_progress_send_frags(mca_btl_mvapi_endpoint_t* endpoint)
|
|||||||
/* We need to post this one */
|
/* We need to post this one */
|
||||||
|
|
||||||
if(OMPI_SUCCESS != mca_btl_mvapi_endpoint_post_send(mvapi_btl, endpoint, frag))
|
if(OMPI_SUCCESS != mca_btl_mvapi_endpoint_post_send(mvapi_btl, endpoint, frag))
|
||||||
BTL_ERROR("error in mca_btl_mvapi_endpoint_send");
|
BTL_ERROR(("error in mca_btl_mvapi_endpoint_send"));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -703,7 +703,7 @@ int mca_btl_mvapi_endpoint_create_qp(
|
|||||||
qp_prop);
|
qp_prop);
|
||||||
|
|
||||||
if(VAPI_OK != ret) {
|
if(VAPI_OK != ret) {
|
||||||
BTL_ERROR("error creating the queue pair: %s", VAPI_strerror(ret));
|
BTL_ERROR(("error creating the queue pair: %s", VAPI_strerror(ret)));
|
||||||
return OMPI_ERROR;
|
return OMPI_ERROR;
|
||||||
}
|
}
|
||||||
return OMPI_SUCCESS;
|
return OMPI_SUCCESS;
|
||||||
@ -749,11 +749,11 @@ int mca_btl_mvapi_endpoint_qp_init_query(
|
|||||||
&qp_attr, &qp_attr_mask, &qp_cap);
|
&qp_attr, &qp_attr_mask, &qp_cap);
|
||||||
|
|
||||||
if(VAPI_OK != ret) {
|
if(VAPI_OK != ret) {
|
||||||
BTL_ERROR("Error modifying the queue pair: %s", VAPI_strerror(ret));
|
BTL_ERROR(("Error modifying the queue pair: %s", VAPI_strerror(ret)));
|
||||||
return OMPI_ERROR;
|
return OMPI_ERROR;
|
||||||
}
|
}
|
||||||
|
|
||||||
BTL_DEBUG_OUT("Modified to init..Qp %d", qp_hndl);
|
BTL_DEBUG(("Modified to init..Qp %d", qp_hndl));
|
||||||
|
|
||||||
/********************** INIT --> RTR ************************/
|
/********************** INIT --> RTR ************************/
|
||||||
QP_ATTR_MASK_CLR_ALL(qp_attr_mask);
|
QP_ATTR_MASK_CLR_ALL(qp_attr_mask);
|
||||||
@ -784,11 +784,11 @@ int mca_btl_mvapi_endpoint_qp_init_query(
|
|||||||
&qp_attr, &qp_attr_mask, &qp_cap);
|
&qp_attr, &qp_attr_mask, &qp_cap);
|
||||||
|
|
||||||
if(VAPI_OK != ret) {
|
if(VAPI_OK != ret) {
|
||||||
BTL_ERROR("Error modifying the queue pair: %s", VAPI_strerror(ret));
|
BTL_ERROR(("Error modifying the queue pair: %s", VAPI_strerror(ret)));
|
||||||
return OMPI_ERROR;
|
return OMPI_ERROR;
|
||||||
}
|
}
|
||||||
|
|
||||||
BTL_DEBUG_OUT("Modified to RTR..Qp %d", qp_hndl);
|
BTL_DEBUG(("Modified to RTR..Qp %d", qp_hndl));
|
||||||
|
|
||||||
/************** RTS *******************/
|
/************** RTS *******************/
|
||||||
QP_ATTR_MASK_CLR_ALL(qp_attr_mask);
|
QP_ATTR_MASK_CLR_ALL(qp_attr_mask);
|
||||||
@ -811,11 +811,11 @@ int mca_btl_mvapi_endpoint_qp_init_query(
|
|||||||
if(VAPI_OK != ret) {
|
if(VAPI_OK != ret) {
|
||||||
return OMPI_ERROR;
|
return OMPI_ERROR;
|
||||||
}
|
}
|
||||||
BTL_DEBUG_OUT("Modified to RTS..Qp %d", qp_hndl);
|
BTL_DEBUG(("Modified to RTS..Qp %d", qp_hndl));
|
||||||
|
|
||||||
ret = VAPI_query_qp(nic, qp_hndl, &qp_attr, &qp_attr_mask, &qp_init_attr );
|
ret = VAPI_query_qp(nic, qp_hndl, &qp_attr, &qp_attr_mask, &qp_init_attr );
|
||||||
if (ret != VAPI_OK) {
|
if (ret != VAPI_OK) {
|
||||||
BTL_ERROR("Error modifying the queue pair: %s", VAPI_strerror(ret));
|
BTL_ERROR(("Error modifying the queue pair: %s", VAPI_strerror(ret)));
|
||||||
return OMPI_ERROR;
|
return OMPI_ERROR;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -189,8 +189,8 @@ void mca_btl_mvapi_progress_send_frags(mca_btl_mvapi_endpoint_t*);
|
|||||||
post_rr_sub_cnt, \
|
post_rr_sub_cnt, \
|
||||||
post_rr_sub_desc_post); \
|
post_rr_sub_desc_post); \
|
||||||
if(VAPI_OK != post_rr_sub_frag->ret) { \
|
if(VAPI_OK != post_rr_sub_frag->ret) { \
|
||||||
BTL_ERROR("error posting receive descriptors: %s",\
|
BTL_ERROR(("error posting receive descriptors: %s",\
|
||||||
VAPI_strerror(post_rr_sub_frag->ret)); \
|
VAPI_strerror(post_rr_sub_frag->ret))); \
|
||||||
} else {\
|
} else {\
|
||||||
OPAL_THREAD_ADD32(post_rr_sub_rr_posted, post_rr_sub_cnt); \
|
OPAL_THREAD_ADD32(post_rr_sub_rr_posted, post_rr_sub_cnt); \
|
||||||
}\
|
}\
|
||||||
@ -227,7 +227,7 @@ void mca_btl_mvapi_progress_send_frags(mca_btl_mvapi_endpoint_t*);
|
|||||||
/* cnt, */
|
/* cnt, */
|
||||||
/* rr_desc_post); */
|
/* rr_desc_post); */
|
||||||
/* if(VAPI_OK != frag->ret) { */
|
/* if(VAPI_OK != frag->ret) { */
|
||||||
/* BTL_ERROR("error posting receive descriptors: %s", VAPI_strerror(frag->ret)); */
|
/* BTL_ERROR(("error posting receive descriptors: %s", VAPI_strerror(frag->ret))); */
|
||||||
/* return OMPI_ERROR; */
|
/* return OMPI_ERROR; */
|
||||||
/* } */
|
/* } */
|
||||||
/* OPAL_THREAD_ADD32(rr_posted, cnt); */
|
/* OPAL_THREAD_ADD32(rr_posted, cnt); */
|
||||||
|
@ -131,8 +131,7 @@ int mca_btl_openib_del_procs(struct mca_btl_base_module_t* btl,
|
|||||||
struct ompi_proc_t **procs,
|
struct ompi_proc_t **procs,
|
||||||
struct mca_btl_base_endpoint_t ** peers)
|
struct mca_btl_base_endpoint_t ** peers)
|
||||||
{
|
{
|
||||||
/* TODO */
|
BTL_DEBUG(("TODO\n"));
|
||||||
BTL_DEBUG_OUT("Stub\n");
|
|
||||||
return OMPI_SUCCESS;
|
return OMPI_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -215,7 +214,7 @@ int mca_btl_openib_free(
|
|||||||
} else if(frag->size == mca_btl_openib_component.eager_limit){
|
} else if(frag->size == mca_btl_openib_component.eager_limit){
|
||||||
MCA_BTL_IB_FRAG_RETURN_EAGER(btl, frag);
|
MCA_BTL_IB_FRAG_RETURN_EAGER(btl, frag);
|
||||||
} else {
|
} else {
|
||||||
BTL_ERROR("invalid descriptor");
|
BTL_ERROR(("invalid descriptor"));
|
||||||
}
|
}
|
||||||
|
|
||||||
return OMPI_SUCCESS;
|
return OMPI_SUCCESS;
|
||||||
@ -299,13 +298,13 @@ mca_btl_base_descriptor_t* mca_btl_openib_prepare_src(
|
|||||||
|
|
||||||
rc = mca_mpool_base_remove((void*) openib_reg->base_reg.base);
|
rc = mca_mpool_base_remove((void*) openib_reg->base_reg.base);
|
||||||
if(OMPI_SUCCESS != rc) {
|
if(OMPI_SUCCESS != rc) {
|
||||||
BTL_ERROR("error removing memory region from memory pool tree");
|
BTL_ERROR(("error removing memory region from memory pool tree"));
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
if(is_leave_pinned) {
|
if(is_leave_pinned) {
|
||||||
if(NULL == opal_list_remove_item(&openib_btl->reg_mru_list, (opal_list_item_t*) openib_reg)){
|
if(NULL == opal_list_remove_item(&openib_btl->reg_mru_list, (opal_list_item_t*) openib_reg)){
|
||||||
BTL_ERROR("error removing item from reg_mru_list");
|
BTL_ERROR(("error removing item from reg_mru_list"));
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -324,7 +323,7 @@ mca_btl_base_descriptor_t* mca_btl_openib_prepare_src(
|
|||||||
|
|
||||||
|
|
||||||
if(rc != OMPI_SUCCESS) {
|
if(rc != OMPI_SUCCESS) {
|
||||||
BTL_ERROR("error inserting memory region into memory pool tree");
|
BTL_ERROR(("error inserting memory region into memory pool tree"));
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -338,7 +337,7 @@ mca_btl_base_descriptor_t* mca_btl_openib_prepare_src(
|
|||||||
} else if(is_leave_pinned) {
|
} else if(is_leave_pinned) {
|
||||||
/* the current memory region is large enough and we should leave the memory pinned */
|
/* the current memory region is large enough and we should leave the memory pinned */
|
||||||
if(NULL == opal_list_remove_item(&openib_btl->reg_mru_list, (opal_list_item_t*) openib_reg)) {
|
if(NULL == opal_list_remove_item(&openib_btl->reg_mru_list, (opal_list_item_t*) openib_reg)) {
|
||||||
BTL_ERROR("error removing item from reg_mru_list");
|
BTL_ERROR(("error removing item from reg_mru_list"));
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
opal_list_append(&openib_btl->reg_mru_list, (opal_list_item_t*) openib_reg);
|
opal_list_append(&openib_btl->reg_mru_list, (opal_list_item_t*) openib_reg);
|
||||||
@ -398,7 +397,7 @@ mca_btl_base_descriptor_t* mca_btl_openib_prepare_src(
|
|||||||
opal_list_remove_last(&openib_btl->reg_mru_list);
|
opal_list_remove_last(&openib_btl->reg_mru_list);
|
||||||
|
|
||||||
if( NULL == old_reg) {
|
if( NULL == old_reg) {
|
||||||
BTL_ERROR("error removing item from reg_mru_list");
|
BTL_ERROR(("error removing item from reg_mru_list"));
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -406,7 +405,7 @@ mca_btl_base_descriptor_t* mca_btl_openib_prepare_src(
|
|||||||
rc = mca_mpool_base_remove((void*) old_reg->base_reg.base);
|
rc = mca_mpool_base_remove((void*) old_reg->base_reg.base);
|
||||||
|
|
||||||
if(OMPI_SUCCESS != rc) {
|
if(OMPI_SUCCESS != rc) {
|
||||||
BTL_ERROR("error removing memory region from memory pool tree");
|
BTL_ERROR(("error removing memory region from memory pool tree"));
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -452,8 +451,7 @@ mca_btl_base_descriptor_t* mca_btl_openib_prepare_src(
|
|||||||
frag->base.des_dst = NULL;
|
frag->base.des_dst = NULL;
|
||||||
frag->base.des_dst_cnt = 0;
|
frag->base.des_dst_cnt = 0;
|
||||||
frag->openib_reg = openib_reg;
|
frag->openib_reg = openib_reg;
|
||||||
BTL_DEBUG_OUT("frag->sg_entry.lkey = %lu .addr = %llu", frag->sg_entry.lkey, frag->sg_entry.addr);
|
BTL_DEBUG(("frag->sg_entry.lkey = %lu .addr = %llu", frag->sg_entry.lkey, frag->sg_entry.addr));
|
||||||
|
|
||||||
|
|
||||||
return &frag->base;
|
return &frag->base;
|
||||||
|
|
||||||
@ -580,7 +578,7 @@ mca_btl_base_descriptor_t* mca_btl_openib_prepare_dst(
|
|||||||
|
|
||||||
rc = mca_mpool_base_remove((void*) openib_reg->base_reg.base);
|
rc = mca_mpool_base_remove((void*) openib_reg->base_reg.base);
|
||||||
if(OMPI_SUCCESS != rc) {
|
if(OMPI_SUCCESS != rc) {
|
||||||
BTL_ERROR("error removing memory region from memory pool tree");
|
BTL_ERROR(("error removing memory region from memory pool tree"));
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -589,7 +587,7 @@ mca_btl_base_descriptor_t* mca_btl_openib_prepare_dst(
|
|||||||
* pull it off the MRU list
|
* pull it off the MRU list
|
||||||
*/
|
*/
|
||||||
if(NULL == opal_list_remove_item(&openib_btl->reg_mru_list, (opal_list_item_t*) openib_reg)) {
|
if(NULL == opal_list_remove_item(&openib_btl->reg_mru_list, (opal_list_item_t*) openib_reg)) {
|
||||||
BTL_ERROR("error removing item from reg_mru_list");
|
BTL_ERROR(("error removing item from reg_mru_list"));
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -608,7 +606,7 @@ mca_btl_base_descriptor_t* mca_btl_openib_prepare_dst(
|
|||||||
(mca_mpool_base_registration_t*) openib_reg);
|
(mca_mpool_base_registration_t*) openib_reg);
|
||||||
|
|
||||||
if(OMPI_SUCCESS != rc) {
|
if(OMPI_SUCCESS != rc) {
|
||||||
BTL_ERROR("error inserting memory region into memory pool tree");
|
BTL_ERROR(("error inserting memory region into memory pool tree"));
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
OBJ_RETAIN(openib_reg);
|
OBJ_RETAIN(openib_reg);
|
||||||
@ -623,7 +621,7 @@ mca_btl_base_descriptor_t* mca_btl_openib_prepare_dst(
|
|||||||
else if(is_leave_pinned){
|
else if(is_leave_pinned){
|
||||||
/* the current memory region is large enough and we should leave the memory pinned */
|
/* the current memory region is large enough and we should leave the memory pinned */
|
||||||
if(NULL == opal_list_remove_item(&openib_btl->reg_mru_list, (opal_list_item_t*) openib_reg)) {
|
if(NULL == opal_list_remove_item(&openib_btl->reg_mru_list, (opal_list_item_t*) openib_reg)) {
|
||||||
BTL_ERROR("error removing item from reg_mru_list");
|
BTL_ERROR(("error removing item from reg_mru_list"));
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
opal_list_append(&openib_btl->reg_mru_list, (opal_list_item_t*) openib_reg);
|
opal_list_append(&openib_btl->reg_mru_list, (opal_list_item_t*) openib_reg);
|
||||||
@ -647,13 +645,13 @@ mca_btl_base_descriptor_t* mca_btl_openib_prepare_dst(
|
|||||||
opal_list_remove_last(&openib_btl->reg_mru_list);
|
opal_list_remove_last(&openib_btl->reg_mru_list);
|
||||||
|
|
||||||
if( NULL == old_reg) {
|
if( NULL == old_reg) {
|
||||||
BTL_ERROR("error removing item from reg_mru_list");
|
BTL_ERROR(("error removing item from reg_mru_list"));
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
rc = mca_mpool_base_remove((void*) old_reg->base_reg.base);
|
rc = mca_mpool_base_remove((void*) old_reg->base_reg.base);
|
||||||
if(OMPI_SUCCESS !=rc ) {
|
if(OMPI_SUCCESS !=rc ) {
|
||||||
BTL_ERROR("error removing memory region from memory pool tree");
|
BTL_ERROR(("error removing memory region from memory pool tree"));
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -673,7 +671,7 @@ mca_btl_base_descriptor_t* mca_btl_openib_prepare_dst(
|
|||||||
(void*) (&openib_btl->super),
|
(void*) (&openib_btl->super),
|
||||||
(mca_mpool_base_registration_t*) openib_reg);
|
(mca_mpool_base_registration_t*) openib_reg);
|
||||||
if(OMPI_SUCCESS != rc){
|
if(OMPI_SUCCESS != rc){
|
||||||
BTL_ERROR("error inserting memory region into memory pool");
|
BTL_ERROR(("error inserting memory region into memory pool"));
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -705,7 +703,7 @@ mca_btl_base_descriptor_t* mca_btl_openib_prepare_dst(
|
|||||||
frag->base.des_src = NULL;
|
frag->base.des_src = NULL;
|
||||||
frag->base.des_src_cnt = 0;
|
frag->base.des_src_cnt = 0;
|
||||||
frag->openib_reg = openib_reg;
|
frag->openib_reg = openib_reg;
|
||||||
BTL_DEBUG_OUT("frag->sg_entry.lkey = %lu .addr = %llu frag->segment.seg_key.key32[0] = %lu" , frag->sg_entry.lkey, frag->sg_entry.addr, frag->segment.seg_key.key32[0]);
|
BTL_DEBUG(("frag->sg_entry.lkey = %lu .addr = %llu frag->segment.seg_key.key32[0] = %lu" , frag->sg_entry.lkey, frag->sg_entry.addr, frag->segment.seg_key.key32[0]));
|
||||||
|
|
||||||
return &frag->base;
|
return &frag->base;
|
||||||
|
|
||||||
@ -794,16 +792,16 @@ int mca_btl_openib_put( mca_btl_base_module_t* btl,
|
|||||||
frag->sg_entry.addr = (uintptr_t) frag->base.des_src->seg_addr.pval;
|
frag->sg_entry.addr = (uintptr_t) frag->base.des_src->seg_addr.pval;
|
||||||
frag->sg_entry.length = frag->base.des_src->seg_len;
|
frag->sg_entry.length = frag->base.des_src->seg_len;
|
||||||
|
|
||||||
BTL_DEBUG_OUT("frag->wr_desc.sr_desc.wr.rdma.remote_addr = %llu .rkey = %lu frag->sg_entry.addr = %llu .length = %lu"
|
BTL_DEBUG(("frag->wr_desc.sr_desc.wr.rdma.remote_addr = %llu .rkey = %lu frag->sg_entry.addr = %llu .length = %lu"
|
||||||
, frag->wr_desc.sr_desc.wr.rdma.remote_addr
|
, frag->wr_desc.sr_desc.wr.rdma.remote_addr
|
||||||
, frag->wr_desc.sr_desc.wr.rdma.rkey
|
, frag->wr_desc.sr_desc.wr.rdma.rkey
|
||||||
, frag->sg_entry.addr
|
, frag->sg_entry.addr
|
||||||
, frag->sg_entry.length);
|
, frag->sg_entry.length));
|
||||||
|
|
||||||
if(ibv_post_send(endpoint->lcl_qp_low,
|
if(ibv_post_send(endpoint->lcl_qp_low,
|
||||||
&frag->wr_desc.sr_desc,
|
&frag->wr_desc.sr_desc,
|
||||||
&bad_wr)){
|
&bad_wr)){
|
||||||
BTL_ERROR("error posting send request errno says %s", strerror(errno));
|
BTL_ERROR(("error posting send request errno says %s", strerror(errno)));
|
||||||
return OMPI_ERROR;
|
return OMPI_ERROR;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -830,9 +828,9 @@ int mca_btl_openib_module_init(mca_btl_openib_module_t *openib_btl)
|
|||||||
|
|
||||||
|
|
||||||
if(NULL == openib_btl->ib_pd) {
|
if(NULL == openib_btl->ib_pd) {
|
||||||
BTL_ERROR("error allocating pd for %s errno says %s\n",
|
BTL_ERROR(("error allocating pd for %s errno says %s\n",
|
||||||
ibv_get_device_name(openib_btl->ib_dev),
|
ibv_get_device_name(openib_btl->ib_dev),
|
||||||
strerror(errno));
|
strerror(errno)));
|
||||||
return OMPI_ERROR;
|
return OMPI_ERROR;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -840,18 +838,18 @@ int mca_btl_openib_module_init(mca_btl_openib_module_t *openib_btl)
|
|||||||
openib_btl->ib_cq_low = ibv_create_cq(ctx, mca_btl_openib_component.ib_cq_size, NULL);
|
openib_btl->ib_cq_low = ibv_create_cq(ctx, mca_btl_openib_component.ib_cq_size, NULL);
|
||||||
|
|
||||||
if(NULL == openib_btl->ib_cq_low) {
|
if(NULL == openib_btl->ib_cq_low) {
|
||||||
BTL_ERROR("error creating low priority cq for %s errno says %s\n",
|
BTL_ERROR(("error creating low priority cq for %s errno says %s\n",
|
||||||
ibv_get_device_name(openib_btl->ib_dev),
|
ibv_get_device_name(openib_btl->ib_dev),
|
||||||
strerror(errno));
|
strerror(errno)));
|
||||||
return OMPI_ERROR;
|
return OMPI_ERROR;
|
||||||
}
|
}
|
||||||
|
|
||||||
openib_btl->ib_cq_high = ibv_create_cq(ctx, mca_btl_openib_component.ib_cq_size, NULL);
|
openib_btl->ib_cq_high = ibv_create_cq(ctx, mca_btl_openib_component.ib_cq_size, NULL);
|
||||||
|
|
||||||
if(NULL == openib_btl->ib_cq_high) {
|
if(NULL == openib_btl->ib_cq_high) {
|
||||||
BTL_ERROR("error creating high priority cq for %s errno says %s\n",
|
BTL_ERROR(("error creating high priority cq for %s errno says %s\n",
|
||||||
ibv_get_device_name(openib_btl->ib_dev),
|
ibv_get_device_name(openib_btl->ib_dev),
|
||||||
strerror(errno));
|
strerror(errno)));
|
||||||
return OMPI_ERROR;
|
return OMPI_ERROR;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -265,7 +265,7 @@ mca_btl_base_module_t** mca_btl_openib_component_init(int *num_btl_modules,
|
|||||||
num_devs++;
|
num_devs++;
|
||||||
|
|
||||||
if(0 == num_devs) {
|
if(0 == num_devs) {
|
||||||
BTL_ERROR("No hca's found on this host!");
|
BTL_ERROR(("No hca's found on this host!"));
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -300,12 +300,12 @@ mca_btl_base_module_t** mca_btl_openib_component_init(int *num_btl_modules,
|
|||||||
|
|
||||||
ib_dev_context = ibv_open_device(ib_dev);
|
ib_dev_context = ibv_open_device(ib_dev);
|
||||||
if(!ib_dev_context) {
|
if(!ib_dev_context) {
|
||||||
BTL_ERROR(" error obtaining device context for %s errno says %s\n", ibv_get_device_name(ib_dev), strerror(errno));
|
BTL_ERROR((" error obtaining device context for %s errno says %s\n", ibv_get_device_name(ib_dev), strerror(errno)));
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
if(ibv_query_device(ib_dev_context, &ib_dev_attr)){
|
if(ibv_query_device(ib_dev_context, &ib_dev_attr)){
|
||||||
BTL_ERROR("error obtaining device attributes for %s errno says %s\n", ibv_get_device_name(ib_dev), strerror(errno));
|
BTL_ERROR(("error obtaining device attributes for %s errno says %s\n", ibv_get_device_name(ib_dev), strerror(errno)));
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -316,8 +316,8 @@ mca_btl_base_module_t** mca_btl_openib_component_init(int *num_btl_modules,
|
|||||||
struct ibv_port_attr* ib_port_attr;
|
struct ibv_port_attr* ib_port_attr;
|
||||||
ib_port_attr = (struct ibv_port_attr*) malloc(sizeof(struct ibv_port_attr));
|
ib_port_attr = (struct ibv_port_attr*) malloc(sizeof(struct ibv_port_attr));
|
||||||
if(ibv_query_port(ib_dev_context, (uint8_t) j, ib_port_attr)){
|
if(ibv_query_port(ib_dev_context, (uint8_t) j, ib_port_attr)){
|
||||||
BTL_ERROR("error getting port attributes for device %s port number %d errno says %s",
|
BTL_ERROR(("error getting port attributes for device %s port number %d errno says %s",
|
||||||
ibv_get_device_name(ib_dev), j, strerror(errno));
|
ibv_get_device_name(ib_dev), j, strerror(errno)));
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -402,7 +402,7 @@ mca_btl_base_module_t** mca_btl_openib_component_init(int *num_btl_modules,
|
|||||||
&mpool_resources);
|
&mpool_resources);
|
||||||
|
|
||||||
if(NULL == openib_btl->ib_pool) {
|
if(NULL == openib_btl->ib_pool) {
|
||||||
BTL_ERROR("error creating vapi memory pool! aborting openib btl initialization");
|
BTL_ERROR(("error creating vapi memory pool! aborting openib btl initialization"));
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -510,27 +510,27 @@ int mca_btl_openib_component_progress()
|
|||||||
do{
|
do{
|
||||||
ne=ibv_poll_cq(openib_btl->ib_cq_high, 1, &wc );
|
ne=ibv_poll_cq(openib_btl->ib_cq_high, 1, &wc );
|
||||||
if(ne < 0 ){
|
if(ne < 0 ){
|
||||||
BTL_ERROR("error polling CQ with %d errno says %s\n", ne, strerror(errno));
|
BTL_ERROR(("error polling CQ with %d errno says %s\n", ne, strerror(errno)));
|
||||||
return OMPI_ERROR;
|
return OMPI_ERROR;
|
||||||
}
|
}
|
||||||
else if(wc.status != IBV_WC_SUCCESS) {
|
else if(wc.status != IBV_WC_SUCCESS) {
|
||||||
BTL_ERROR("error polling CQ with status %d for wr_id %llu\n",
|
BTL_ERROR(("error polling CQ with status %d for wr_id %llu\n",
|
||||||
wc.status, wc.wr_id);
|
wc.status, wc.wr_id));
|
||||||
return OMPI_ERROR;
|
return OMPI_ERROR;
|
||||||
}
|
}
|
||||||
else if(1 == ne) {
|
else if(1 == ne) {
|
||||||
BTL_DEBUG_OUT("completion queue event says opcode is %d\n", wc.opcode);
|
BTL_DEBUG(("completion queue event says opcode is %d\n", wc.opcode));
|
||||||
|
|
||||||
/* Handle work completions */
|
/* Handle work completions */
|
||||||
switch(wc.opcode) {
|
switch(wc.opcode) {
|
||||||
case IBV_WC_RECV_RDMA_WITH_IMM:
|
case IBV_WC_RECV_RDMA_WITH_IMM:
|
||||||
BTL_ERROR("Got an RDMA with Immediate data Not supported!");
|
BTL_ERROR(("Got an RDMA with Immediate data Not supported!"));
|
||||||
return OMPI_ERROR;
|
return OMPI_ERROR;
|
||||||
|
|
||||||
case IBV_WC_RECV:
|
case IBV_WC_RECV:
|
||||||
/* Process a RECV */
|
/* Process a RECV */
|
||||||
|
|
||||||
BTL_DEBUG_OUT("Got an recv on the completion queue");
|
BTL_DEBUG(("Got an recv on the completion queue"));
|
||||||
frag = (mca_btl_openib_frag_t*) wc.wr_id;
|
frag = (mca_btl_openib_frag_t*) wc.wr_id;
|
||||||
endpoint = (mca_btl_openib_endpoint_t*) frag->endpoint;
|
endpoint = (mca_btl_openib_endpoint_t*) frag->endpoint;
|
||||||
frag->rc=OMPI_SUCCESS;
|
frag->rc=OMPI_SUCCESS;
|
||||||
@ -566,7 +566,7 @@ int mca_btl_openib_component_progress()
|
|||||||
break;
|
break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
BTL_ERROR("Unhandled work completion opcode is %d", wc.opcode);
|
BTL_ERROR(("Unhandled work completion opcode is %d", wc.opcode));
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -575,24 +575,24 @@ int mca_btl_openib_component_progress()
|
|||||||
|
|
||||||
ne=ibv_poll_cq(openib_btl->ib_cq_low, 1, &wc );
|
ne=ibv_poll_cq(openib_btl->ib_cq_low, 1, &wc );
|
||||||
if(ne < 0){
|
if(ne < 0){
|
||||||
BTL_ERROR("error polling CQ with %d errno says %s", ne, strerror(errno));
|
BTL_ERROR(("error polling CQ with %d errno says %s", ne, strerror(errno)));
|
||||||
return OMPI_ERROR;
|
return OMPI_ERROR;
|
||||||
}
|
}
|
||||||
else if(wc.status != IBV_WC_SUCCESS) {
|
else if(wc.status != IBV_WC_SUCCESS) {
|
||||||
BTL_ERROR("error polling CQ with status %d for wr_id %llu",
|
BTL_ERROR(("error polling CQ with status %d for wr_id %llu",
|
||||||
wc.status, wc.wr_id);
|
wc.status, wc.wr_id));
|
||||||
return OMPI_ERROR;
|
return OMPI_ERROR;
|
||||||
}
|
}
|
||||||
else if(1 == ne) {
|
else if(1 == ne) {
|
||||||
/* Handle n/w completions */
|
/* Handle n/w completions */
|
||||||
switch(wc.opcode) {
|
switch(wc.opcode) {
|
||||||
case IBV_WC_RECV_RDMA_WITH_IMM:
|
case IBV_WC_RECV_RDMA_WITH_IMM:
|
||||||
BTL_ERROR("Got an RDMA with Immediate data Not supported!");
|
BTL_ERROR(("Got an RDMA with Immediate data Not supported!"));
|
||||||
return OMPI_ERROR;
|
return OMPI_ERROR;
|
||||||
|
|
||||||
case IBV_WC_RECV:
|
case IBV_WC_RECV:
|
||||||
/* process a recv completion (this should only occur for a send not an rdma) */
|
/* process a recv completion (this should only occur for a send not an rdma) */
|
||||||
BTL_DEBUG_OUT( "Got a recv completion");
|
BTL_DEBUG(( "Got a recv completion"));
|
||||||
frag = (mca_btl_openib_frag_t*) wc.wr_id;
|
frag = (mca_btl_openib_frag_t*) wc.wr_id;
|
||||||
endpoint = (mca_btl_openib_endpoint_t*) frag->endpoint;
|
endpoint = (mca_btl_openib_endpoint_t*) frag->endpoint;
|
||||||
frag->rc=OMPI_SUCCESS;
|
frag->rc=OMPI_SUCCESS;
|
||||||
@ -624,7 +624,7 @@ int mca_btl_openib_component_progress()
|
|||||||
break;
|
break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
BTL_ERROR("Unhandled work completion opcode is %d", wc.opcode);
|
BTL_ERROR(("Unhandled work completion opcode is %d", wc.opcode));
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -93,7 +93,7 @@ static inline int mca_btl_openib_endpoint_post_send(mca_btl_openib_module_t* ope
|
|||||||
if(ibv_post_send(ib_qp,
|
if(ibv_post_send(ib_qp,
|
||||||
&frag->wr_desc.sr_desc,
|
&frag->wr_desc.sr_desc,
|
||||||
&bad_wr)) {
|
&bad_wr)) {
|
||||||
BTL_ERROR("error posting send request errno says %s", strerror(errno));
|
BTL_ERROR(("error posting send request errno says %s", strerror(errno)));
|
||||||
return OMPI_ERROR;
|
return OMPI_ERROR;
|
||||||
}
|
}
|
||||||
MCA_BTL_OPENIB_ENDPOINT_POST_RR_HIGH(endpoint, 1);
|
MCA_BTL_OPENIB_ENDPOINT_POST_RR_HIGH(endpoint, 1);
|
||||||
@ -206,10 +206,10 @@ static int mca_btl_openib_endpoint_send_connect_req(mca_btl_base_endpoint_t* end
|
|||||||
mca_btl_openib_endpoint_send_cb, NULL);
|
mca_btl_openib_endpoint_send_cb, NULL);
|
||||||
|
|
||||||
|
|
||||||
BTL_DEBUG_OUT("Sending High Priority QP num = %d, Low Priority QP num = %d, LID = %d",
|
BTL_DEBUG(("Sending High Priority QP num = %d, Low Priority QP num = %d, LID = %d",
|
||||||
endpoint->lcl_qp_high->qp_num,
|
endpoint->lcl_qp_high->qp_num,
|
||||||
endpoint->lcl_qp_low->qp_num,
|
endpoint->lcl_qp_low->qp_num,
|
||||||
endpoint->endpoint_btl->ib_port_attr->lid);
|
endpoint->endpoint_btl->ib_port_attr->lid));
|
||||||
|
|
||||||
if(rc < 0) {
|
if(rc < 0) {
|
||||||
ORTE_ERROR_LOG(rc);
|
ORTE_ERROR_LOG(rc);
|
||||||
@ -297,10 +297,10 @@ static int mca_btl_openib_endpoint_set_remote_info(mca_btl_base_endpoint_t* endp
|
|||||||
ORTE_ERROR_LOG(rc);
|
ORTE_ERROR_LOG(rc);
|
||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
BTL_DEBUG_OUT("Received High Priority QP num = %d, Low Priority QP num %d, LID = %d",
|
BTL_DEBUG(("Received High Priority QP num = %d, Low Priority QP num %d, LID = %d",
|
||||||
endpoint->rem_qp_num_high,
|
endpoint->rem_qp_num_high,
|
||||||
endpoint->rem_qp_num_low,
|
endpoint->rem_qp_num_low,
|
||||||
endpoint->rem_lid);
|
endpoint->rem_lid));
|
||||||
|
|
||||||
return ORTE_SUCCESS;
|
return ORTE_SUCCESS;
|
||||||
}
|
}
|
||||||
@ -328,7 +328,7 @@ static int mca_btl_openib_endpoint_start_connect(mca_btl_base_endpoint_t* endpoi
|
|||||||
openib_btl->ib_cq_high,
|
openib_btl->ib_cq_high,
|
||||||
endpoint->lcl_qp_attr_high,
|
endpoint->lcl_qp_attr_high,
|
||||||
&endpoint->lcl_qp_high))) {
|
&endpoint->lcl_qp_high))) {
|
||||||
BTL_ERROR("error creating queue pair, error code %d", rc);
|
BTL_ERROR(("error creating queue pair, error code %d", rc));
|
||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
srand48(getpid() * time(NULL));
|
srand48(getpid() * time(NULL));
|
||||||
@ -340,20 +340,20 @@ static int mca_btl_openib_endpoint_start_connect(mca_btl_base_endpoint_t* endpoi
|
|||||||
openib_btl->ib_cq_low,
|
openib_btl->ib_cq_low,
|
||||||
endpoint->lcl_qp_attr_low,
|
endpoint->lcl_qp_attr_low,
|
||||||
&endpoint->lcl_qp_low))) {
|
&endpoint->lcl_qp_low))) {
|
||||||
BTL_ERROR("error creating queue pair, error code %d", rc);
|
BTL_ERROR(("error creating queue pair, error code %d", rc));
|
||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
endpoint->lcl_psn_low = lrand48() & 0xffffff;
|
endpoint->lcl_psn_low = lrand48() & 0xffffff;
|
||||||
|
|
||||||
BTL_DEBUG_OUT("Initialized High Priority QP num = %d, Low Priority QP num = %d, LID = %d",
|
BTL_DEBUG(("Initialized High Priority QP num = %d, Low Priority QP num = %d, LID = %d",
|
||||||
endpoint->lcl_qp_high->qp_num,
|
endpoint->lcl_qp_high->qp_num,
|
||||||
endpoint->lcl_qp_low->qp_num,
|
endpoint->lcl_qp_low->qp_num,
|
||||||
openib_btl->ib_port_attr->lid);
|
openib_btl->ib_port_attr->lid));
|
||||||
|
|
||||||
/* Send connection info over to remote endpoint */
|
/* Send connection info over to remote endpoint */
|
||||||
endpoint->endpoint_state = MCA_BTL_IB_CONNECTING;
|
endpoint->endpoint_state = MCA_BTL_IB_CONNECTING;
|
||||||
if(OMPI_SUCCESS != (rc = mca_btl_openib_endpoint_send_connect_req(endpoint))) {
|
if(OMPI_SUCCESS != (rc = mca_btl_openib_endpoint_send_connect_req(endpoint))) {
|
||||||
BTL_ERROR("error sending connect request, error code %d", rc);
|
BTL_ERROR(("error sending connect request, error code %d", rc));
|
||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
return OMPI_SUCCESS;
|
return OMPI_SUCCESS;
|
||||||
@ -375,7 +375,7 @@ static int mca_btl_openib_endpoint_reply_start_connect(mca_btl_openib_endpoint_t
|
|||||||
openib_btl->ib_cq_high,
|
openib_btl->ib_cq_high,
|
||||||
endpoint->lcl_qp_attr_high,
|
endpoint->lcl_qp_attr_high,
|
||||||
&endpoint->lcl_qp_high))) {
|
&endpoint->lcl_qp_high))) {
|
||||||
BTL_ERROR("error creating queue pair, error code %d", rc);
|
BTL_ERROR(("error creating queue pair, error code %d", rc));
|
||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
srand48(getpid() * time(NULL));
|
srand48(getpid() * time(NULL));
|
||||||
@ -387,15 +387,15 @@ static int mca_btl_openib_endpoint_reply_start_connect(mca_btl_openib_endpoint_t
|
|||||||
openib_btl->ib_cq_low,
|
openib_btl->ib_cq_low,
|
||||||
endpoint->lcl_qp_attr_low,
|
endpoint->lcl_qp_attr_low,
|
||||||
&endpoint->lcl_qp_low))) {
|
&endpoint->lcl_qp_low))) {
|
||||||
BTL_ERROR("error creating queue pair, error code %d", rc);
|
BTL_ERROR(("error creating queue pair, error code %d", rc));
|
||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
endpoint->lcl_psn_low = lrand48() & 0xffffff;
|
endpoint->lcl_psn_low = lrand48() & 0xffffff;
|
||||||
|
|
||||||
BTL_DEBUG_OUT("Initialized High Priority QP num = %d, Low Priority QP num = %d, LID = %d",
|
BTL_DEBUG(("Initialized High Priority QP num = %d, Low Priority QP num = %d, LID = %d",
|
||||||
endpoint->lcl_qp_high->qp_num,
|
endpoint->lcl_qp_high->qp_num,
|
||||||
endpoint->lcl_qp_low->qp_num,
|
endpoint->lcl_qp_low->qp_num,
|
||||||
openib_btl->ib_port_attr->lid);
|
openib_btl->ib_port_attr->lid));
|
||||||
|
|
||||||
|
|
||||||
/* Set the remote side info */
|
/* Set the remote side info */
|
||||||
@ -405,13 +405,13 @@ static int mca_btl_openib_endpoint_reply_start_connect(mca_btl_openib_endpoint_t
|
|||||||
|
|
||||||
rc = mca_btl_openib_endpoint_connect(endpoint);
|
rc = mca_btl_openib_endpoint_connect(endpoint);
|
||||||
if(rc != OMPI_SUCCESS) {
|
if(rc != OMPI_SUCCESS) {
|
||||||
BTL_ERROR("error in endpoint connect error code is %d", rc);
|
BTL_ERROR(("error in endpoint connect error code is %d", rc));
|
||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Send connection info over to remote endpoint */
|
/* Send connection info over to remote endpoint */
|
||||||
if(OMPI_SUCCESS != (rc = mca_btl_openib_endpoint_send_connect_req(endpoint))) {
|
if(OMPI_SUCCESS != (rc = mca_btl_openib_endpoint_send_connect_req(endpoint))) {
|
||||||
BTL_ERROR("error in endpoint send connect request error code is %d", rc);
|
BTL_ERROR(("error in endpoint send connect request error code is %d", rc));
|
||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
return OMPI_SUCCESS;
|
return OMPI_SUCCESS;
|
||||||
@ -476,7 +476,7 @@ static void mca_btl_openib_endpoint_recv(
|
|||||||
* and then reply with our QP information */
|
* and then reply with our QP information */
|
||||||
|
|
||||||
if(OMPI_SUCCESS != (rc = mca_btl_openib_endpoint_reply_start_connect(ib_endpoint, buffer))) {
|
if(OMPI_SUCCESS != (rc = mca_btl_openib_endpoint_reply_start_connect(ib_endpoint, buffer))) {
|
||||||
BTL_ERROR("error in endpoint reply start connect");
|
BTL_ERROR(("error in endpoint reply start connect"));
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -488,7 +488,7 @@ static void mca_btl_openib_endpoint_recv(
|
|||||||
|
|
||||||
mca_btl_openib_endpoint_set_remote_info(ib_endpoint, buffer);
|
mca_btl_openib_endpoint_set_remote_info(ib_endpoint, buffer);
|
||||||
if(OMPI_SUCCESS != (rc = mca_btl_openib_endpoint_connect(ib_endpoint))) {
|
if(OMPI_SUCCESS != (rc = mca_btl_openib_endpoint_connect(ib_endpoint))) {
|
||||||
BTL_ERROR("endpoint connect error: %d", rc);
|
BTL_ERROR(("endpoint connect error: %d", rc));
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -508,7 +508,7 @@ static void mca_btl_openib_endpoint_recv(
|
|||||||
|
|
||||||
break;
|
break;
|
||||||
default :
|
default :
|
||||||
BTL_ERROR("Invalid endpoint state %d", endpoint_state);
|
BTL_ERROR(("Invalid endpoint state %d", endpoint_state));
|
||||||
}
|
}
|
||||||
|
|
||||||
break;
|
break;
|
||||||
@ -553,7 +553,7 @@ int mca_btl_openib_endpoint_send(
|
|||||||
switch(endpoint->endpoint_state) {
|
switch(endpoint->endpoint_state) {
|
||||||
case MCA_BTL_IB_CONNECTING:
|
case MCA_BTL_IB_CONNECTING:
|
||||||
|
|
||||||
BTL_DEBUG_OUT("Queing because state is connecting");
|
BTL_DEBUG(("Queing because state is connecting"));
|
||||||
|
|
||||||
opal_list_append(&endpoint->pending_send_frags,
|
opal_list_append(&endpoint->pending_send_frags,
|
||||||
(opal_list_item_t *)frag);
|
(opal_list_item_t *)frag);
|
||||||
@ -563,7 +563,7 @@ int mca_btl_openib_endpoint_send(
|
|||||||
|
|
||||||
case MCA_BTL_IB_CONNECT_ACK:
|
case MCA_BTL_IB_CONNECT_ACK:
|
||||||
|
|
||||||
BTL_DEBUG_OUT("Queuing because waiting for ack");
|
BTL_DEBUG(("Queuing because waiting for ack"));
|
||||||
|
|
||||||
opal_list_append(&endpoint->pending_send_frags,
|
opal_list_append(&endpoint->pending_send_frags,
|
||||||
(opal_list_item_t *)frag);
|
(opal_list_item_t *)frag);
|
||||||
@ -573,7 +573,7 @@ int mca_btl_openib_endpoint_send(
|
|||||||
|
|
||||||
case MCA_BTL_IB_CLOSED:
|
case MCA_BTL_IB_CLOSED:
|
||||||
|
|
||||||
BTL_DEBUG_OUT("Connection to endpoint closed ... connecting ...");
|
BTL_DEBUG(("Connection to endpoint closed ... connecting ..."));
|
||||||
opal_list_append(&endpoint->pending_send_frags,
|
opal_list_append(&endpoint->pending_send_frags,
|
||||||
(opal_list_item_t *)frag);
|
(opal_list_item_t *)frag);
|
||||||
rc = mca_btl_openib_endpoint_start_connect(endpoint);
|
rc = mca_btl_openib_endpoint_start_connect(endpoint);
|
||||||
@ -587,10 +587,10 @@ int mca_btl_openib_endpoint_send(
|
|||||||
case MCA_BTL_IB_CONNECTED:
|
case MCA_BTL_IB_CONNECTED:
|
||||||
{
|
{
|
||||||
openib_btl = endpoint->endpoint_btl;
|
openib_btl = endpoint->endpoint_btl;
|
||||||
BTL_DEBUG_OUT("Send to : %d, len : %lu, frag : %llu",
|
BTL_DEBUG(("Send to : %d, len : %lu, frag : %llu",
|
||||||
endpoint->endpoint_proc->proc_guid.vpid,
|
endpoint->endpoint_proc->proc_guid.vpid,
|
||||||
frag->sg_entry.length,
|
frag->sg_entry.length,
|
||||||
(unsigned long long) frag);
|
(unsigned long long) frag));
|
||||||
rc = mca_btl_openib_endpoint_post_send(openib_btl, endpoint, frag);
|
rc = mca_btl_openib_endpoint_post_send(openib_btl, endpoint, frag);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -628,7 +628,7 @@ void mca_btl_openib_progress_send_frags(mca_btl_openib_endpoint_t* endpoint)
|
|||||||
/* We need to post this one */
|
/* We need to post this one */
|
||||||
|
|
||||||
if(OMPI_SUCCESS != mca_btl_openib_endpoint_post_send(openib_btl, endpoint, frag))
|
if(OMPI_SUCCESS != mca_btl_openib_endpoint_post_send(openib_btl, endpoint, frag))
|
||||||
BTL_ERROR("Error posting send");
|
BTL_ERROR(("Error posting send"));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -716,7 +716,7 @@ int mca_btl_openib_endpoint_create_qp(
|
|||||||
my_qp = ibv_create_qp(pd, &qp_init_attr);
|
my_qp = ibv_create_qp(pd, &qp_init_attr);
|
||||||
|
|
||||||
if(NULL == my_qp) {
|
if(NULL == my_qp) {
|
||||||
BTL_ERROR("error creating qp errno says %s", strerror(errno));
|
BTL_ERROR(("error creating qp errno says %s", strerror(errno)));
|
||||||
return OMPI_ERROR;
|
return OMPI_ERROR;
|
||||||
}
|
}
|
||||||
(*qp) = my_qp;
|
(*qp) = my_qp;
|
||||||
@ -735,7 +735,7 @@ int mca_btl_openib_endpoint_create_qp(
|
|||||||
IBV_QP_PKEY_INDEX |
|
IBV_QP_PKEY_INDEX |
|
||||||
IBV_QP_PORT |
|
IBV_QP_PORT |
|
||||||
IBV_QP_ACCESS_FLAGS )) {
|
IBV_QP_ACCESS_FLAGS )) {
|
||||||
BTL_ERROR("error modifying qp to INIT errno says %s", strerror(errno));
|
BTL_ERROR(("error modifying qp to INIT errno says %s", strerror(errno)));
|
||||||
return OMPI_ERROR;
|
return OMPI_ERROR;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -781,7 +781,7 @@ int mca_btl_openib_endpoint_qp_init_query(
|
|||||||
IBV_QP_RQ_PSN |
|
IBV_QP_RQ_PSN |
|
||||||
IBV_QP_MAX_DEST_RD_ATOMIC |
|
IBV_QP_MAX_DEST_RD_ATOMIC |
|
||||||
IBV_QP_MIN_RNR_TIMER)) {
|
IBV_QP_MIN_RNR_TIMER)) {
|
||||||
BTL_ERROR("error modifing QP to RTR errno says %s", strerror(errno));
|
BTL_ERROR(("error modifing QP to RTR errno says %s", strerror(errno)));
|
||||||
return OMPI_ERROR;
|
return OMPI_ERROR;
|
||||||
}
|
}
|
||||||
attr->qp_state = IBV_QPS_RTS;
|
attr->qp_state = IBV_QPS_RTS;
|
||||||
@ -797,7 +797,7 @@ int mca_btl_openib_endpoint_qp_init_query(
|
|||||||
IBV_QP_RNR_RETRY |
|
IBV_QP_RNR_RETRY |
|
||||||
IBV_QP_SQ_PSN |
|
IBV_QP_SQ_PSN |
|
||||||
IBV_QP_MAX_QP_RD_ATOMIC)) {
|
IBV_QP_MAX_QP_RD_ATOMIC)) {
|
||||||
BTL_ERROR("error modifying QP to RTS errno says %s", strerror(errno));
|
BTL_ERROR(("error modifying QP to RTS errno says %s", strerror(errno)));
|
||||||
return OMPI_ERROR;
|
return OMPI_ERROR;
|
||||||
}
|
}
|
||||||
return OMPI_SUCCESS;
|
return OMPI_SUCCESS;
|
||||||
|
@ -188,7 +188,7 @@ void mca_btl_openib_progress_send_frags(mca_btl_openib_endpoint_t*);
|
|||||||
if(ibv_post_recv(post_rr_sub_qp, \
|
if(ibv_post_recv(post_rr_sub_qp, \
|
||||||
&post_rr_sub_frag->wr_desc.rr_desc, \
|
&post_rr_sub_frag->wr_desc.rr_desc, \
|
||||||
&post_rr_sub_bad_wr)) { \
|
&post_rr_sub_bad_wr)) { \
|
||||||
BTL_ERROR("error posting receive errno says %s\n", strerror(errno)); \
|
BTL_ERROR(("error posting receive errno says %s\n", strerror(errno))); \
|
||||||
return OMPI_ERROR; \
|
return OMPI_ERROR; \
|
||||||
}\
|
}\
|
||||||
}\
|
}\
|
||||||
|
56
ompi/mca/btl/tcp/Makefile.am
Обычный файл
56
ompi/mca/btl/tcp/Makefile.am
Обычный файл
@ -0,0 +1,56 @@
|
|||||||
|
#
|
||||||
|
# Copyright (c) 2004-2005 The Trustees of Indiana University.
|
||||||
|
# All rights reserved.
|
||||||
|
# Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
|
||||||
|
# All rights reserved.
|
||||||
|
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||||
|
# University of Stuttgart. All rights reserved.
|
||||||
|
# Copyright (c) 2004-2005 The Regents of the University of California.
|
||||||
|
# All rights reserved.
|
||||||
|
# $COPYRIGHT$
|
||||||
|
#
|
||||||
|
# Additional copyrights may follow
|
||||||
|
#
|
||||||
|
# $HEADER$
|
||||||
|
#
|
||||||
|
|
||||||
|
# Use the top-level Makefile.options
|
||||||
|
|
||||||
|
include $(top_ompi_srcdir)/config/Makefile.options
|
||||||
|
|
||||||
|
sources = \
|
||||||
|
btl_tcp.c \
|
||||||
|
btl_tcp.h \
|
||||||
|
btl_tcp_component.c \
|
||||||
|
btl_tcp_endpoint.c \
|
||||||
|
btl_tcp_endpoint.h \
|
||||||
|
btl_tcp_frag.c \
|
||||||
|
btl_tcp_frag.h \
|
||||||
|
btl_tcp_proc.c \
|
||||||
|
btl_tcp_proc.h \
|
||||||
|
btl_tcp_error.h
|
||||||
|
|
||||||
|
# Make the output library in this directory, and name it either
|
||||||
|
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
|
||||||
|
# (for static builds).
|
||||||
|
|
||||||
|
if OMPI_BUILD_btl_tcp_DSO
|
||||||
|
lib =
|
||||||
|
lib_sources =
|
||||||
|
component = mca_btl_tcp.la
|
||||||
|
component_sources = $(sources)
|
||||||
|
else
|
||||||
|
lib = libmca_btl_tcp.la
|
||||||
|
lib_sources = $(sources)
|
||||||
|
component =
|
||||||
|
component_sources =
|
||||||
|
endif
|
||||||
|
|
||||||
|
mcacomponentdir = $(libdir)/openmpi
|
||||||
|
mcacomponent_LTLIBRARIES = $(component)
|
||||||
|
mca_btl_tcp_la_SOURCES = $(component_sources)
|
||||||
|
mca_btl_tcp_la_LDFLAGS = -module -avoid-version
|
||||||
|
|
||||||
|
noinst_LTLIBRARIES = $(lib)
|
||||||
|
libmca_btl_tcp_la_SOURCES = $(lib_sources)
|
||||||
|
libmca_btl_tcp_la_LDFLAGS = -module -avoid-version
|
426
ompi/mca/btl/tcp/btl_tcp.c
Обычный файл
426
ompi/mca/btl/tcp/btl_tcp.c
Обычный файл
@ -0,0 +1,426 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2004-2005 The Trustees of Indiana University.
|
||||||
|
* All rights reserved.
|
||||||
|
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
|
||||||
|
* All rights reserved.
|
||||||
|
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||||
|
* University of Stuttgart. All rights reserved.
|
||||||
|
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||||
|
* All rights reserved.
|
||||||
|
* $COPYRIGHT$
|
||||||
|
*
|
||||||
|
* Additional copyrights may follow
|
||||||
|
*
|
||||||
|
* $HEADER$
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "ompi_config.h"
|
||||||
|
#include <string.h>
|
||||||
|
#include "util/output.h"
|
||||||
|
#include "util/if.h"
|
||||||
|
#include "mca/pml/pml.h"
|
||||||
|
#include "mca/btl/btl.h"
|
||||||
|
|
||||||
|
#include "btl_tcp.h"
|
||||||
|
#include "btl_tcp_frag.h"
|
||||||
|
#include "btl_tcp_proc.h"
|
||||||
|
#include "btl_tcp_endpoint.h"
|
||||||
|
#include "datatype/convertor.h"
|
||||||
|
#include "mca/mpool/base/base.h"
|
||||||
|
#include "mca/mpool/mpool.h"
|
||||||
|
|
||||||
|
|
||||||
|
mca_btl_tcp_module_t mca_btl_tcp_module = {
|
||||||
|
{
|
||||||
|
&mca_btl_tcp_component.super,
|
||||||
|
0, /* max size of first fragment */
|
||||||
|
0, /* min send fragment size */
|
||||||
|
0, /* max send fragment size */
|
||||||
|
0, /* min rdma fragment size */
|
||||||
|
0, /* max rdma fragment size */
|
||||||
|
0, /* exclusivity */
|
||||||
|
0, /* latency */
|
||||||
|
0, /* bandwidth */
|
||||||
|
0, /* flags */
|
||||||
|
mca_btl_tcp_add_procs,
|
||||||
|
mca_btl_tcp_del_procs,
|
||||||
|
mca_btl_tcp_register,
|
||||||
|
mca_btl_tcp_finalize,
|
||||||
|
mca_btl_tcp_alloc,
|
||||||
|
mca_btl_tcp_free,
|
||||||
|
mca_btl_tcp_prepare_src,
|
||||||
|
mca_btl_tcp_prepare_dst,
|
||||||
|
mca_btl_tcp_send,
|
||||||
|
mca_btl_tcp_put,
|
||||||
|
NULL /* get */
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
int mca_btl_tcp_add_procs(
|
||||||
|
struct mca_btl_base_module_t* btl,
|
||||||
|
size_t nprocs,
|
||||||
|
struct ompi_proc_t **ompi_procs,
|
||||||
|
struct mca_btl_base_endpoint_t** peers,
|
||||||
|
ompi_bitmap_t* reachable)
|
||||||
|
{
|
||||||
|
mca_btl_tcp_module_t* tcp_btl = (mca_btl_tcp_module_t*)btl;
|
||||||
|
int i, rc;
|
||||||
|
|
||||||
|
for(i = 0; i < (int) nprocs; i++) {
|
||||||
|
|
||||||
|
struct ompi_proc_t* ompi_proc = ompi_procs[i];
|
||||||
|
mca_btl_tcp_proc_t* tcp_proc;
|
||||||
|
mca_btl_base_endpoint_t* tcp_endpoint;
|
||||||
|
|
||||||
|
if(NULL == (tcp_proc = mca_btl_tcp_proc_create(ompi_proc))) {
|
||||||
|
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Check to make sure that the peer has at least as many interface
|
||||||
|
* addresses exported as we are trying to use. If not, then
|
||||||
|
* don't bind this BTL instance to the proc.
|
||||||
|
*/
|
||||||
|
|
||||||
|
OPAL_THREAD_LOCK(&tcp_proc->proc_lock);
|
||||||
|
|
||||||
|
/* The btl_proc datastructure is shared by all TCP BTL
|
||||||
|
* instances that are trying to reach this destination.
|
||||||
|
* Cache the peer instance on the btl_proc.
|
||||||
|
*/
|
||||||
|
tcp_endpoint = OBJ_NEW(mca_btl_tcp_endpoint_t);
|
||||||
|
if(NULL == tcp_endpoint) {
|
||||||
|
OPAL_THREAD_UNLOCK(&module_proc->proc_lock);
|
||||||
|
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||||
|
}
|
||||||
|
|
||||||
|
tcp_endpoint->endpoint_btl = tcp_btl;
|
||||||
|
rc = mca_btl_tcp_proc_insert(tcp_proc, tcp_endpoint);
|
||||||
|
if(rc != OMPI_SUCCESS) {
|
||||||
|
OBJ_RELEASE(tcp_endpoint);
|
||||||
|
OPAL_THREAD_UNLOCK(&module_proc->proc_lock);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
ompi_bitmap_set_bit(reachable, i);
|
||||||
|
OPAL_THREAD_UNLOCK(&module_proc->proc_lock);
|
||||||
|
peers[i] = tcp_endpoint;
|
||||||
|
opal_list_append(&tcp_btl->tcp_endpoints, (opal_list_item_t*)tcp_endpoint);
|
||||||
|
|
||||||
|
/* we increase the count of MPI users of the event library
|
||||||
|
once per peer, so that we are used until we aren't
|
||||||
|
connected to a peer */
|
||||||
|
opal_progress_event_increment();
|
||||||
|
}
|
||||||
|
|
||||||
|
return OMPI_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
int mca_btl_tcp_del_procs(struct mca_btl_base_module_t* btl,
|
||||||
|
size_t nprocs,
|
||||||
|
struct ompi_proc_t **procs,
|
||||||
|
struct mca_btl_base_endpoint_t ** peers)
|
||||||
|
{
|
||||||
|
/* TODO */
|
||||||
|
return OMPI_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Register callback function to support send/recv semantics
|
||||||
|
*/
|
||||||
|
|
||||||
|
int mca_btl_tcp_register(
|
||||||
|
struct mca_btl_base_module_t* btl,
|
||||||
|
mca_btl_base_tag_t tag,
|
||||||
|
mca_btl_base_module_recv_cb_fn_t cbfunc,
|
||||||
|
void* cbdata)
|
||||||
|
{
|
||||||
|
mca_btl_tcp_module_t* tcp_btl = (mca_btl_tcp_module_t*) btl;
|
||||||
|
tcp_btl->tcp_reg[tag].cbfunc = cbfunc;
|
||||||
|
tcp_btl->tcp_reg[tag].cbdata = cbdata;
|
||||||
|
return OMPI_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Allocate a segment.
|
||||||
|
*
|
||||||
|
* @param btl (IN) BTL module
|
||||||
|
* @param size (IN) Request segment size.
|
||||||
|
*/
|
||||||
|
|
||||||
|
mca_btl_base_descriptor_t* mca_btl_tcp_alloc(
|
||||||
|
struct mca_btl_base_module_t* btl,
|
||||||
|
size_t size)
|
||||||
|
{
|
||||||
|
mca_btl_tcp_frag_t* frag;
|
||||||
|
int rc;
|
||||||
|
|
||||||
|
if(size <= btl->btl_eager_limit){
|
||||||
|
MCA_BTL_TCP_FRAG_ALLOC_EAGER(frag, rc);
|
||||||
|
frag->segments[0].seg_len =
|
||||||
|
size <= btl->btl_eager_limit ?
|
||||||
|
size : btl->btl_eager_limit ;
|
||||||
|
} else {
|
||||||
|
MCA_BTL_TCP_FRAG_ALLOC_MAX(frag, rc);
|
||||||
|
frag->segments[0].seg_len =
|
||||||
|
size <= btl->btl_max_send_size ?
|
||||||
|
size : btl->btl_max_send_size ;
|
||||||
|
}
|
||||||
|
frag->segments[0].seg_addr.pval = frag+1;
|
||||||
|
|
||||||
|
frag->base.des_src = frag->segments;
|
||||||
|
frag->base.des_src_cnt = 1;
|
||||||
|
frag->base.des_dst = NULL;
|
||||||
|
frag->base.des_dst_cnt = 0;
|
||||||
|
frag->base.des_flags = 0;
|
||||||
|
frag->btl = (mca_btl_tcp_module_t*)btl;
|
||||||
|
return (mca_btl_base_descriptor_t*)frag;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Return a segment
|
||||||
|
*/
|
||||||
|
|
||||||
|
int mca_btl_tcp_free(
|
||||||
|
struct mca_btl_base_module_t* btl,
|
||||||
|
mca_btl_base_descriptor_t* des)
|
||||||
|
{
|
||||||
|
mca_btl_tcp_frag_t* frag = (mca_btl_tcp_frag_t*)des;
|
||||||
|
if(frag->size == 0) {
|
||||||
|
MCA_BTL_TCP_FRAG_RETURN_USER(frag);
|
||||||
|
} else if(frag->size == btl->btl_eager_limit){
|
||||||
|
MCA_BTL_TCP_FRAG_RETURN_EAGER(frag);
|
||||||
|
} else if(frag->size == btl->btl_max_send_size) {
|
||||||
|
MCA_BTL_TCP_FRAG_RETURN_MAX(frag);
|
||||||
|
} else {
|
||||||
|
return OMPI_ERR_BAD_PARAM;
|
||||||
|
}
|
||||||
|
return OMPI_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Pack data and return a descriptor that can be
|
||||||
|
* used for send/put.
|
||||||
|
*
|
||||||
|
* @param btl (IN) BTL module
|
||||||
|
* @param peer (IN) BTL peer addressing
|
||||||
|
*/
|
||||||
|
mca_btl_base_descriptor_t* mca_btl_tcp_prepare_src(
|
||||||
|
struct mca_btl_base_module_t* btl,
|
||||||
|
struct mca_btl_base_endpoint_t* endpoint,
|
||||||
|
struct mca_mpool_base_registration_t* registration,
|
||||||
|
struct ompi_convertor_t* convertor,
|
||||||
|
size_t reserve,
|
||||||
|
size_t* size)
|
||||||
|
{
|
||||||
|
mca_btl_tcp_frag_t* frag;
|
||||||
|
struct iovec iov;
|
||||||
|
uint32_t iov_count = 1;
|
||||||
|
size_t max_data = *size;
|
||||||
|
int32_t free_after;
|
||||||
|
int rc;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* if we aren't pinning the data and the requested size is less
|
||||||
|
* than the eager limit pack into a fragment from the eager pool
|
||||||
|
*/
|
||||||
|
|
||||||
|
if (max_data+reserve <= btl->btl_eager_limit) {
|
||||||
|
MCA_BTL_TCP_FRAG_ALLOC_EAGER(frag, rc);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* otherwise pack as much data as we can into a fragment
|
||||||
|
* that is the max send size.
|
||||||
|
*/
|
||||||
|
else {
|
||||||
|
MCA_BTL_TCP_FRAG_ALLOC_MAX(frag, rc);
|
||||||
|
}
|
||||||
|
if(NULL == frag) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
if(ompi_convertor_need_buffers(convertor)) {
|
||||||
|
|
||||||
|
if (max_data + reserve > frag->size) {
|
||||||
|
max_data = frag->size - reserve;
|
||||||
|
}
|
||||||
|
iov.iov_len = max_data;
|
||||||
|
iov.iov_base = (void*)(((unsigned char*)(frag+1)) + reserve);
|
||||||
|
|
||||||
|
rc = ompi_convertor_pack(convertor, &iov, &iov_count, &max_data, &free_after);
|
||||||
|
*size = max_data;
|
||||||
|
if( rc < 0 ) {
|
||||||
|
MCA_BTL_TCP_FRAG_RETURN_EAGER(frag);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
frag->segments[0].seg_addr.pval = iov.iov_base;
|
||||||
|
frag->segments[0].seg_len = max_data + reserve;
|
||||||
|
frag->base.des_src_cnt = 1;
|
||||||
|
|
||||||
|
} else {
|
||||||
|
|
||||||
|
iov.iov_len = max_data;
|
||||||
|
iov.iov_base = NULL;
|
||||||
|
|
||||||
|
rc = ompi_convertor_pack(convertor, &iov, &iov_count, &max_data, &free_after);
|
||||||
|
*size = max_data;
|
||||||
|
if( rc < 0 ) {
|
||||||
|
MCA_BTL_TCP_FRAG_RETURN_EAGER(frag);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
frag->segments[0].seg_addr.pval = frag+1;
|
||||||
|
frag->segments[0].seg_len = reserve;
|
||||||
|
frag->segments[1].seg_addr.pval = iov.iov_base;
|
||||||
|
frag->segments[1].seg_len = max_data;
|
||||||
|
frag->base.des_src_cnt = 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
frag->base.des_src = frag->segments;
|
||||||
|
frag->base.des_dst = NULL;
|
||||||
|
frag->base.des_dst_cnt = 0;
|
||||||
|
frag->base.des_flags = 0;
|
||||||
|
return &frag->base;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Prepare a descriptor for send/rdma using the supplied
|
||||||
|
* convertor. If the convertor references data that is contigous,
|
||||||
|
* the descriptor may simply point to the user buffer. Otherwise,
|
||||||
|
* this routine is responsible for allocating buffer space and
|
||||||
|
* packing if required.
|
||||||
|
*
|
||||||
|
* @param btl (IN) BTL module
|
||||||
|
* @param endpoint (IN) BTL peer addressing
|
||||||
|
* @param convertor (IN) Data type convertor
|
||||||
|
* @param reserve (IN) Additional bytes requested by upper layer to precede user data
|
||||||
|
* @param size (IN/OUT) Number of bytes to prepare (IN), number of bytes actually prepared (OUT)
|
||||||
|
*/
|
||||||
|
|
||||||
|
mca_btl_base_descriptor_t* mca_btl_tcp_prepare_dst(
|
||||||
|
struct mca_btl_base_module_t* btl,
|
||||||
|
struct mca_btl_base_endpoint_t* endpoint,
|
||||||
|
struct mca_mpool_base_registration_t* registration,
|
||||||
|
struct ompi_convertor_t* convertor,
|
||||||
|
size_t reserve,
|
||||||
|
size_t* size)
|
||||||
|
{
|
||||||
|
mca_btl_tcp_frag_t* frag;
|
||||||
|
int rc;
|
||||||
|
|
||||||
|
MCA_BTL_TCP_FRAG_ALLOC_USER(frag, rc);
|
||||||
|
if(NULL == frag) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
frag->segments->seg_len = *size;
|
||||||
|
frag->segments->seg_addr.pval = convertor->pBaseBuf + convertor->bConverted;
|
||||||
|
|
||||||
|
frag->base.des_src = NULL;
|
||||||
|
frag->base.des_src_cnt = 0;
|
||||||
|
frag->base.des_dst = frag->segments;
|
||||||
|
frag->base.des_dst_cnt = 1;
|
||||||
|
frag->base.des_flags = 0;
|
||||||
|
return &frag->base;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Initiate an asynchronous send.
|
||||||
|
*
|
||||||
|
* @param btl (IN) BTL module
|
||||||
|
* @param endpoint (IN) BTL addressing information
|
||||||
|
* @param descriptor (IN) Description of the data to be transfered
|
||||||
|
* @param tag (IN) The tag value used to notify the peer.
|
||||||
|
*/
|
||||||
|
|
||||||
|
int mca_btl_tcp_send(
|
||||||
|
struct mca_btl_base_module_t* btl,
|
||||||
|
struct mca_btl_base_endpoint_t* endpoint,
|
||||||
|
struct mca_btl_base_descriptor_t* descriptor,
|
||||||
|
mca_btl_base_tag_t tag)
|
||||||
|
{
|
||||||
|
mca_btl_tcp_module_t* tcp_btl = (mca_btl_tcp_module_t*) btl;
|
||||||
|
mca_btl_tcp_frag_t* frag = (mca_btl_tcp_frag_t*)descriptor;
|
||||||
|
frag->btl = tcp_btl;
|
||||||
|
frag->hdr.base.tag = tag;
|
||||||
|
frag->hdr.type = MCA_BTL_TCP_HDR_TYPE_SEND;
|
||||||
|
MCA_BTL_TCP_FRAG_INIT_SRC(frag,endpoint);
|
||||||
|
return mca_btl_tcp_endpoint_send(endpoint,frag);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Initiate an asynchronous put.
|
||||||
|
*
|
||||||
|
* @param btl (IN) BTL module
|
||||||
|
* @param endpoint (IN) BTL addressing information
|
||||||
|
* @param descriptor (IN) Description of the data to be transferred
|
||||||
|
*/
|
||||||
|
|
||||||
|
int mca_btl_tcp_put(
|
||||||
|
mca_btl_base_module_t* btl,
|
||||||
|
mca_btl_base_endpoint_t* endpoint,
|
||||||
|
mca_btl_base_descriptor_t* descriptor)
|
||||||
|
{
|
||||||
|
mca_btl_tcp_module_t* tcp_btl = (mca_btl_tcp_module_t*) btl;
|
||||||
|
mca_btl_tcp_frag_t* frag = (mca_btl_tcp_frag_t*) descriptor;
|
||||||
|
frag->btl = tcp_btl;
|
||||||
|
frag->endpoint = endpoint;
|
||||||
|
/* TODO */
|
||||||
|
return OMPI_ERR_NOT_IMPLEMENTED;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Initiate an asynchronous get.
|
||||||
|
*
|
||||||
|
* @param btl (IN) BTL module
|
||||||
|
* @param endpoint (IN) BTL addressing information
|
||||||
|
* @param descriptor (IN) Description of the data to be transferred
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
int mca_btl_tcp_get(
|
||||||
|
mca_btl_base_module_t* btl,
|
||||||
|
mca_btl_base_endpoint_t* endpoint,
|
||||||
|
mca_btl_base_descriptor_t* descriptor)
|
||||||
|
{
|
||||||
|
mca_btl_tcp_module_t* tcp_btl = (mca_btl_tcp_module_t*) btl;
|
||||||
|
mca_btl_tcp_frag_t* frag = (mca_btl_tcp_frag_t*) descriptor;
|
||||||
|
frag->btl = tcp_btl;
|
||||||
|
frag->endpoint = endpoint;
|
||||||
|
/* TODO */
|
||||||
|
return OMPI_ERR_NOT_IMPLEMENTED;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Cleanup/release module resources.
|
||||||
|
*/
|
||||||
|
|
||||||
|
int mca_btl_tcp_finalize(struct mca_btl_base_module_t* btl)
|
||||||
|
{
|
||||||
|
mca_btl_tcp_module_t* tcp_btl = (mca_btl_tcp_module_t*) btl;
|
||||||
|
opal_list_item_t* item;
|
||||||
|
for( item = opal_list_remove_first(&tcp_btl->tcp_endpoints);
|
||||||
|
item != NULL;
|
||||||
|
item = opal_list_remove_first(&tcp_btl->tcp_endpoints)) {
|
||||||
|
mca_btl_tcp_endpoint_t *endpoint = (mca_btl_tcp_endpoint_t*)item;
|
||||||
|
OBJ_RELEASE(endpoint);
|
||||||
|
opal_progress_event_decrement();
|
||||||
|
}
|
||||||
|
free(tcp_btl);
|
||||||
|
return OMPI_SUCCESS;
|
||||||
|
}
|
||||||
|
|
319
ompi/mca/btl/tcp/btl_tcp.h
Обычный файл
319
ompi/mca/btl/tcp/btl_tcp.h
Обычный файл
@ -0,0 +1,319 @@
|
|||||||
|
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2004-2005 The Trustees of Indiana University.
|
||||||
|
* All rights reserved.
|
||||||
|
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
|
||||||
|
* All rights reserved.
|
||||||
|
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||||
|
* University of Stuttgart. All rights reserved.
|
||||||
|
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||||
|
* All rights reserved.
|
||||||
|
* $COPYRIGHT$
|
||||||
|
*
|
||||||
|
* Additional copyrights may follow
|
||||||
|
*
|
||||||
|
* $HEADER$
|
||||||
|
*/
|
||||||
|
/**
|
||||||
|
* @file
|
||||||
|
*/
|
||||||
|
#ifndef MCA_BTL_TCP_H
|
||||||
|
#define MCA_BTL_TCP_H
|
||||||
|
|
||||||
|
/* Standard system includes */
|
||||||
|
#include "ompi_config.h"
|
||||||
|
#ifdef HAVE_SYS_TYPES_H
|
||||||
|
#include <sys/types.h>
|
||||||
|
#endif
|
||||||
|
#ifdef HAVE_SYS_SOCKET_H
|
||||||
|
#include <sys/socket.h>
|
||||||
|
#endif
|
||||||
|
#ifdef HAVE_NETINET_IN_H
|
||||||
|
#include <netinet/in.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* Open MPI includes */
|
||||||
|
#include "opal/event/event.h"
|
||||||
|
#include "opal/util/output.h"
|
||||||
|
#include "ompi/class/ompi_bitmap.h"
|
||||||
|
#include "ompi/class/ompi_free_list.h"
|
||||||
|
#include "ompi/mca/pml/pml.h"
|
||||||
|
#include "ompi/mca/btl/btl.h"
|
||||||
|
#include "ompi/mca/btl/base/base.h"
|
||||||
|
#include "ompi/mca/mpool/mpool.h"
|
||||||
|
#include "ompi/mca/btl/btl.h"
|
||||||
|
|
||||||
|
#define MCA_BTL_TCP_STATISTICS 0
|
||||||
|
#if defined(c_plusplus) || defined(__cplusplus)
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Infiniband (TCP) BTL component.
|
||||||
|
*/
|
||||||
|
|
||||||
|
struct mca_btl_tcp_component_t {
|
||||||
|
mca_btl_base_component_1_0_0_t super; /**< base BTL component */
|
||||||
|
uint32_t tcp_num_btls; /**< number of hcas available to the TCP component */
|
||||||
|
struct mca_btl_tcp_module_t **tcp_btls; /**< array of available BTL modules */
|
||||||
|
struct mca_btl_tcp_proc_t* tcp_local; /**< local proc struct */
|
||||||
|
int tcp_free_list_num; /**< initial size of free lists */
|
||||||
|
int tcp_free_list_max; /**< maximum size of free lists */
|
||||||
|
int tcp_free_list_inc; /**< number of elements to alloc when growing free lists */
|
||||||
|
opal_hash_table_t tcp_procs; /**< hash table of tcp proc structures */
|
||||||
|
opal_list_t tcp_events; /**< list of pending tcp events */
|
||||||
|
opal_mutex_t tcp_lock; /**< lock for accessing module state */
|
||||||
|
opal_event_t tcp_recv_event; /**< recv event for listen socket */
|
||||||
|
int tcp_listen_sd; /**< listen socket for incoming connection requests */
|
||||||
|
unsigned short tcp_listen_port; /**< listen port */
|
||||||
|
char* tcp_if_include; /**< comma seperated list of interface to include */
|
||||||
|
char* tcp_if_exclude; /**< comma seperated list of interface to exclude */
|
||||||
|
int tcp_sndbuf; /**< socket sndbuf size */
|
||||||
|
int tcp_rcvbuf; /**< socket rcvbuf size */
|
||||||
|
|
||||||
|
/* free list of fragment descriptors */
|
||||||
|
ompi_free_list_t tcp_frag_eager;
|
||||||
|
ompi_free_list_t tcp_frag_max;
|
||||||
|
ompi_free_list_t tcp_frag_user;
|
||||||
|
};
|
||||||
|
typedef struct mca_btl_tcp_component_t mca_btl_tcp_component_t;
|
||||||
|
|
||||||
|
extern mca_btl_tcp_component_t mca_btl_tcp_component;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* BTL Module Interface
|
||||||
|
*/
|
||||||
|
struct mca_btl_tcp_module_t {
|
||||||
|
mca_btl_base_module_t super; /**< base BTL interface */
|
||||||
|
mca_btl_base_recv_reg_t tcp_reg[256];
|
||||||
|
int tcp_ifindex; /**< PTL interface index */
|
||||||
|
struct sockaddr_in tcp_ifaddr; /**< PTL interface address */
|
||||||
|
struct sockaddr_in tcp_ifmask; /**< PTL interface netmask */
|
||||||
|
opal_list_t tcp_endpoints;
|
||||||
|
#if MCA_BTL_TCP_STATISTICS
|
||||||
|
size_t tcp_bytes_sent;
|
||||||
|
size_t tcp_bytes_recv;
|
||||||
|
size_t tcp_send_handler;
|
||||||
|
#endif
|
||||||
|
};
|
||||||
|
typedef struct mca_btl_tcp_module_t mca_btl_tcp_module_t;
|
||||||
|
extern mca_btl_tcp_module_t mca_btl_tcp_module;
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Register TCP component parameters with the MCA framework
|
||||||
|
*/
|
||||||
|
extern int mca_btl_tcp_component_open(void);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Any final cleanup before being unloaded.
|
||||||
|
*/
|
||||||
|
extern int mca_btl_tcp_component_close(void);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* TCP component initialization.
|
||||||
|
*
|
||||||
|
* @param num_btl_modules (OUT) Number of BTLs returned in BTL array.
|
||||||
|
* @param allow_multi_user_threads (OUT) Flag indicating wether BTL supports user threads (TRUE)
|
||||||
|
* @param have_hidden_threads (OUT) Flag indicating wether BTL uses threads (TRUE)
|
||||||
|
*/
|
||||||
|
extern mca_btl_base_module_t** mca_btl_tcp_component_init(
|
||||||
|
int *num_btl_modules,
|
||||||
|
bool allow_multi_user_threads,
|
||||||
|
bool have_hidden_threads
|
||||||
|
);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* TCP component control.
|
||||||
|
*/
|
||||||
|
int mca_btl_tcp_component_control(
|
||||||
|
int param,
|
||||||
|
void* value,
|
||||||
|
size_t size
|
||||||
|
);
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* TCP component progress.
|
||||||
|
*/
|
||||||
|
extern int mca_btl_tcp_component_progress(void);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Cleanup any resources held by the BTL.
|
||||||
|
*
|
||||||
|
* @param btl BTL instance.
|
||||||
|
* @return OMPI_SUCCESS or error status on failure.
|
||||||
|
*/
|
||||||
|
|
||||||
|
extern int mca_btl_tcp_finalize(
|
||||||
|
struct mca_btl_base_module_t* btl
|
||||||
|
);
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* PML->BTL notification of change in the process list.
|
||||||
|
*
|
||||||
|
* @param btl (IN)
|
||||||
|
* @param nprocs (IN) Number of processes
|
||||||
|
* @param procs (IN) Set of processes
|
||||||
|
* @param peers (OUT) Set of (optional) peer addressing info.
|
||||||
|
* @param peers (IN/OUT) Set of processes that are reachable via this BTL.
|
||||||
|
* @return OMPI_SUCCESS or error status on failure.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
extern int mca_btl_tcp_add_procs(
|
||||||
|
struct mca_btl_base_module_t* btl,
|
||||||
|
size_t nprocs,
|
||||||
|
struct ompi_proc_t **procs,
|
||||||
|
struct mca_btl_base_endpoint_t** peers,
|
||||||
|
ompi_bitmap_t* reachable
|
||||||
|
);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* PML->BTL notification of change in the process list.
|
||||||
|
*
|
||||||
|
* @param btl (IN) BTL instance
|
||||||
|
* @param nproc (IN) Number of processes.
|
||||||
|
* @param procs (IN) Set of processes.
|
||||||
|
* @param peers (IN) Set of peer data structures.
|
||||||
|
* @return Status indicating if cleanup was successful
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
extern int mca_btl_tcp_del_procs(
|
||||||
|
struct mca_btl_base_module_t* btl,
|
||||||
|
size_t nprocs,
|
||||||
|
struct ompi_proc_t **procs,
|
||||||
|
struct mca_btl_base_endpoint_t** peers
|
||||||
|
);
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Initiate an asynchronous send.
|
||||||
|
*
|
||||||
|
* @param btl (IN) BTL module
|
||||||
|
* @param endpoint (IN) BTL addressing information
|
||||||
|
* @param descriptor (IN) Description of the data to be transfered
|
||||||
|
* @param tag (IN) The tag value used to notify the peer.
|
||||||
|
*/
|
||||||
|
|
||||||
|
extern int mca_btl_tcp_send(
|
||||||
|
struct mca_btl_base_module_t* btl,
|
||||||
|
struct mca_btl_base_endpoint_t* btl_peer,
|
||||||
|
struct mca_btl_base_descriptor_t* descriptor,
|
||||||
|
mca_btl_base_tag_t tag
|
||||||
|
);
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Initiate an asynchronous put.
|
||||||
|
*
|
||||||
|
* @param btl (IN) BTL module
|
||||||
|
* @param endpoint (IN) BTL addressing information
|
||||||
|
* @param descriptor (IN) Description of the data to be transferred
|
||||||
|
*/
|
||||||
|
|
||||||
|
extern int mca_btl_tcp_put(
|
||||||
|
struct mca_btl_base_module_t* btl,
|
||||||
|
struct mca_btl_base_endpoint_t* btl_peer,
|
||||||
|
struct mca_btl_base_descriptor_t* decriptor
|
||||||
|
);
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Initiate an asynchronous get.
|
||||||
|
*
|
||||||
|
* @param btl (IN) BTL module
|
||||||
|
* @param endpoint (IN) BTL addressing information
|
||||||
|
* @param descriptor (IN) Description of the data to be transferred
|
||||||
|
*/
|
||||||
|
|
||||||
|
extern int mca_btl_tcp_get(
|
||||||
|
struct mca_btl_base_module_t* btl,
|
||||||
|
struct mca_btl_base_endpoint_t* btl_peer,
|
||||||
|
struct mca_btl_base_descriptor_t* decriptor
|
||||||
|
);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Register a callback function that is called on receipt
|
||||||
|
* of a fragment.
|
||||||
|
*
|
||||||
|
* @param btl (IN) BTL module
|
||||||
|
* @return Status indicating if registration was successful
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
extern int mca_btl_tcp_register(
|
||||||
|
struct mca_btl_base_module_t* btl,
|
||||||
|
mca_btl_base_tag_t tag,
|
||||||
|
mca_btl_base_module_recv_cb_fn_t cbfunc,
|
||||||
|
void* cbdata);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Allocate a descriptor with a segment of the requested size.
|
||||||
|
* Note that the BTL layer may choose to return a smaller size
|
||||||
|
* if it cannot support the request.
|
||||||
|
*
|
||||||
|
* @param btl (IN) BTL module
|
||||||
|
* @param size (IN) Request segment size.
|
||||||
|
*/
|
||||||
|
|
||||||
|
extern mca_btl_base_descriptor_t* mca_btl_tcp_alloc(
|
||||||
|
struct mca_btl_base_module_t* btl,
|
||||||
|
size_t size);
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Return a segment allocated by this BTL.
|
||||||
|
*
|
||||||
|
* @param btl (IN) BTL module
|
||||||
|
* @param descriptor (IN) Allocated descriptor.
|
||||||
|
*/
|
||||||
|
|
||||||
|
extern int mca_btl_tcp_free(
|
||||||
|
struct mca_btl_base_module_t* btl,
|
||||||
|
mca_btl_base_descriptor_t* des);
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Prepare a descriptor for send/rdma using the supplied
|
||||||
|
* convertor. If the convertor references data that is contigous,
|
||||||
|
* the descriptor may simply point to the user buffer. Otherwise,
|
||||||
|
* this routine is responsible for allocating buffer space and
|
||||||
|
* packing if required.
|
||||||
|
*
|
||||||
|
* @param btl (IN) BTL module
|
||||||
|
* @param endpoint (IN) BTL peer addressing
|
||||||
|
* @param convertor (IN) Data type convertor
|
||||||
|
* @param reserve (IN) Additional bytes requested by upper layer to precede user data
|
||||||
|
* @param size (IN/OUT) Number of bytes to prepare (IN), number of bytes actually prepared (OUT)
|
||||||
|
*/
|
||||||
|
|
||||||
|
mca_btl_base_descriptor_t* mca_btl_tcp_prepare_src(
|
||||||
|
struct mca_btl_base_module_t* btl,
|
||||||
|
struct mca_btl_base_endpoint_t* peer,
|
||||||
|
struct mca_mpool_base_registration_t*,
|
||||||
|
struct ompi_convertor_t* convertor,
|
||||||
|
size_t reserve,
|
||||||
|
size_t* size
|
||||||
|
);
|
||||||
|
|
||||||
|
extern mca_btl_base_descriptor_t* mca_btl_tcp_prepare_dst(
|
||||||
|
struct mca_btl_base_module_t* btl,
|
||||||
|
struct mca_btl_base_endpoint_t* peer,
|
||||||
|
struct mca_mpool_base_registration_t*,
|
||||||
|
struct ompi_convertor_t* convertor,
|
||||||
|
size_t reserve,
|
||||||
|
size_t* size);
|
||||||
|
|
||||||
|
|
||||||
|
#if defined(c_plusplus) || defined(__cplusplus)
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
#endif
|
44
ompi/mca/btl/tcp/btl_tcp_addr.h
Обычный файл
44
ompi/mca/btl/tcp/btl_tcp_addr.h
Обычный файл
@ -0,0 +1,44 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2004-2005 The Trustees of Indiana University.
|
||||||
|
* All rights reserved.
|
||||||
|
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
|
||||||
|
* All rights reserved.
|
||||||
|
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||||
|
* University of Stuttgart. All rights reserved.
|
||||||
|
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||||
|
* All rights reserved.
|
||||||
|
* $COPYRIGHT$
|
||||||
|
*
|
||||||
|
* Additional copyrights may follow
|
||||||
|
*
|
||||||
|
* $HEADER$
|
||||||
|
*/
|
||||||
|
/**
|
||||||
|
* @file
|
||||||
|
*/
|
||||||
|
#ifndef MCA_BTL_TCP_ADDR_H
|
||||||
|
#define MCA_BTL_TCP_ADDR_H
|
||||||
|
|
||||||
|
#ifdef HAVE_SYS_TYPES_H
|
||||||
|
#include <sys/types.h>
|
||||||
|
#endif
|
||||||
|
#ifdef HAVE_SYS_SOCKET_H
|
||||||
|
#include <sys/socket.h>
|
||||||
|
#endif
|
||||||
|
#ifdef HAVE_NETINET_IN_H
|
||||||
|
#include <netinet/in.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Structure used to publish TCP connection information to peers.
|
||||||
|
*/
|
||||||
|
struct mca_btl_tcp_addr_t {
|
||||||
|
struct in_addr addr_inet; /**< IPv4 address in network byte order */
|
||||||
|
in_port_t addr_port; /**< listen port */
|
||||||
|
unsigned short addr_inuse; /**< local meaning only */
|
||||||
|
};
|
||||||
|
typedef struct mca_btl_tcp_addr_t mca_btl_tcp_addr_t;
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
630
ompi/mca/btl/tcp/btl_tcp_component.c
Обычный файл
630
ompi/mca/btl/tcp/btl_tcp_component.c
Обычный файл
@ -0,0 +1,630 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2004-2005 The Trustees of Indiana University.
|
||||||
|
* All rights reserved.
|
||||||
|
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
|
||||||
|
* All rights reserved.
|
||||||
|
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||||
|
* University of Stuttgart. All rights reserved.
|
||||||
|
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||||
|
* All rights reserved.
|
||||||
|
* $COPYRIGHT$
|
||||||
|
*
|
||||||
|
* Additional copyrights may follow
|
||||||
|
*
|
||||||
|
* $HEADER$
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "ompi_config.h"
|
||||||
|
#include "include/ompi_socket_errno.h"
|
||||||
|
#ifdef HAVE_UNISTD_H
|
||||||
|
#include <unistd.h>
|
||||||
|
#endif
|
||||||
|
#include <string.h>
|
||||||
|
#include <fcntl.h>
|
||||||
|
#ifdef HAVE_SYS_TYPES_H
|
||||||
|
#include <sys/types.h>
|
||||||
|
#endif
|
||||||
|
#ifdef HAVE_SYS_SOCKET_H
|
||||||
|
#include <sys/socket.h>
|
||||||
|
#endif
|
||||||
|
#ifdef HAVE_NETINET_IN_H
|
||||||
|
#include <netinet/in.h>
|
||||||
|
#endif
|
||||||
|
#ifdef HAVE_ARPA_INET_H
|
||||||
|
#include <arpa/inet.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include "include/constants.h"
|
||||||
|
#include "opal/event/event.h"
|
||||||
|
#include "opal/util/if.h"
|
||||||
|
#include "opal/util/argv.h"
|
||||||
|
#include "opal/util/output.h"
|
||||||
|
#include "orte/mca/oob/base/base.h"
|
||||||
|
#include "orte/mca/ns/ns_types.h"
|
||||||
|
#include "ompi/mca/pml/pml.h"
|
||||||
|
#include "ompi/mca/btl/btl.h"
|
||||||
|
|
||||||
|
#include "mca/base/mca_base_param.h"
|
||||||
|
#include "mca/pml/base/pml_base_module_exchange.h"
|
||||||
|
#include "mca/errmgr/errmgr.h"
|
||||||
|
#include "mca/mpool/base/base.h"
|
||||||
|
#include "mca/btl/base/btl_base_error.h"
|
||||||
|
#include "btl_tcp.h"
|
||||||
|
#include "btl_tcp_addr.h"
|
||||||
|
#include "btl_tcp_proc.h"
|
||||||
|
#include "btl_tcp_frag.h"
|
||||||
|
#include "btl_tcp_endpoint.h"
|
||||||
|
#include "mca/btl/base/base.h"
|
||||||
|
#include "datatype/convertor.h"
|
||||||
|
|
||||||
|
|
||||||
|
#define IMPORTANT_WINDOWS_COMMENT() \
|
||||||
|
/* In windows, many of the socket functions return an EWOULDBLOCK instead of \
|
||||||
|
things like EAGAIN, EINPROGRESS, etc. It has been verified that this will \
|
||||||
|
not conflict with other error codes that are returned by these functions \
|
||||||
|
under UNIX/Linux environments */
|
||||||
|
|
||||||
|
|
||||||
|
mca_btl_tcp_component_t mca_btl_tcp_component = {
|
||||||
|
{
|
||||||
|
/* First, the mca_base_component_t struct containing meta information
|
||||||
|
about the component itself */
|
||||||
|
|
||||||
|
{
|
||||||
|
/* Indicate that we are a pml v1.0.0 component (which also implies a
|
||||||
|
specific MCA version) */
|
||||||
|
|
||||||
|
MCA_BTL_BASE_VERSION_1_0_0,
|
||||||
|
|
||||||
|
"tcp", /* MCA component name */
|
||||||
|
1, /* MCA component major version */
|
||||||
|
0, /* MCA component minor version */
|
||||||
|
0, /* MCA component release version */
|
||||||
|
mca_btl_tcp_component_open, /* component open */
|
||||||
|
mca_btl_tcp_component_close /* component close */
|
||||||
|
},
|
||||||
|
|
||||||
|
/* Next the MCA v1.0.0 component meta data */
|
||||||
|
|
||||||
|
{
|
||||||
|
/* Whether the component is checkpointable or not */
|
||||||
|
|
||||||
|
false
|
||||||
|
},
|
||||||
|
|
||||||
|
mca_btl_tcp_component_init,
|
||||||
|
NULL,
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* utility routines for parameter registration
|
||||||
|
*/
|
||||||
|
|
||||||
|
static inline char* mca_btl_tcp_param_register_string(
|
||||||
|
const char* param_name,
|
||||||
|
const char* default_value)
|
||||||
|
{
|
||||||
|
char *param_value;
|
||||||
|
int id = mca_base_param_register_string("btl","tcp",param_name,NULL,default_value);
|
||||||
|
mca_base_param_lookup_string(id, ¶m_value);
|
||||||
|
return param_value;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline int mca_btl_tcp_param_register_int(
|
||||||
|
const char* param_name,
|
||||||
|
int default_value)
|
||||||
|
{
|
||||||
|
int id = mca_base_param_register_int("btl","tcp",param_name,NULL,default_value);
|
||||||
|
int param_value = default_value;
|
||||||
|
mca_base_param_lookup_int(id,¶m_value);
|
||||||
|
return param_value;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Data structure for accepting connections.
|
||||||
|
*/
|
||||||
|
|
||||||
|
struct mca_btl_tcp_event_t {
|
||||||
|
opal_list_item_t item;
|
||||||
|
opal_event_t event;
|
||||||
|
};
|
||||||
|
typedef struct mca_btl_tcp_event_t mca_btl_tcp_event_t;
|
||||||
|
|
||||||
|
static void mca_btl_tcp_event_construct(mca_btl_tcp_event_t* event)
|
||||||
|
{
|
||||||
|
OPAL_THREAD_LOCK(&mca_btl_tcp_component.tcp_lock);
|
||||||
|
opal_list_append(&mca_btl_tcp_component.tcp_events, &event->item);
|
||||||
|
OPAL_THREAD_UNLOCK(&mca_btl_tcp_component.tcp_lock);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void mca_btl_tcp_event_destruct(mca_btl_tcp_event_t* event)
|
||||||
|
{
|
||||||
|
OPAL_THREAD_LOCK(&mca_btl_tcp_component.tcp_lock);
|
||||||
|
opal_list_remove_item(&mca_btl_tcp_component.tcp_events, &event->item);
|
||||||
|
OPAL_THREAD_UNLOCK(&mca_btl_tcp_component.tcp_lock);
|
||||||
|
}
|
||||||
|
|
||||||
|
OBJ_CLASS_INSTANCE(
|
||||||
|
mca_btl_tcp_event_t,
|
||||||
|
opal_list_item_t,
|
||||||
|
mca_btl_tcp_event_construct,
|
||||||
|
mca_btl_tcp_event_destruct);
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* functions for receiving event callbacks
|
||||||
|
*/
|
||||||
|
|
||||||
|
static void mca_btl_tcp_component_recv_handler(int, short, void*);
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Called by MCA framework to open the component, registers
|
||||||
|
* component parameters.
|
||||||
|
*/
|
||||||
|
|
||||||
|
int mca_btl_tcp_component_open(void)
|
||||||
|
{
|
||||||
|
#ifdef WIN32
|
||||||
|
WSADATA win_sock_data;
|
||||||
|
if (WSAStartup(MAKEWORD(2,2), &win_sock_data) != 0) {
|
||||||
|
BTL_ERROR(("failed to initialise windows sockets:%d", WSAGetLastError()));
|
||||||
|
return OMPI_ERROR;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* initialize state */
|
||||||
|
mca_btl_tcp_component.tcp_listen_sd = -1;
|
||||||
|
mca_btl_tcp_component.tcp_num_btls=0;
|
||||||
|
mca_btl_tcp_component.tcp_btls=NULL;
|
||||||
|
|
||||||
|
/* initialize objects */
|
||||||
|
OBJ_CONSTRUCT(&mca_btl_tcp_component.tcp_lock, opal_mutex_t);
|
||||||
|
OBJ_CONSTRUCT(&mca_btl_tcp_component.tcp_procs, opal_hash_table_t);
|
||||||
|
OBJ_CONSTRUCT(&mca_btl_tcp_component.tcp_events, opal_list_t);
|
||||||
|
OBJ_CONSTRUCT(&mca_btl_tcp_component.tcp_frag_eager, ompi_free_list_t);
|
||||||
|
OBJ_CONSTRUCT(&mca_btl_tcp_component.tcp_frag_max, ompi_free_list_t);
|
||||||
|
OBJ_CONSTRUCT(&mca_btl_tcp_component.tcp_frag_user, ompi_free_list_t);
|
||||||
|
opal_hash_table_init(&mca_btl_tcp_component.tcp_procs, 256);
|
||||||
|
|
||||||
|
/* register TCP component parameters */
|
||||||
|
mca_btl_tcp_component.tcp_if_include =
|
||||||
|
mca_btl_tcp_param_register_string("if_include", "");
|
||||||
|
mca_btl_tcp_component.tcp_if_exclude =
|
||||||
|
mca_btl_tcp_param_register_string("if_exclude", "lo");
|
||||||
|
mca_btl_tcp_component.tcp_free_list_num =
|
||||||
|
mca_btl_tcp_param_register_int ("free_list_num", 8);
|
||||||
|
mca_btl_tcp_component.tcp_free_list_max =
|
||||||
|
mca_btl_tcp_param_register_int ("free_list_max", 1024);
|
||||||
|
mca_btl_tcp_component.tcp_free_list_inc =
|
||||||
|
mca_btl_tcp_param_register_int ("free_list_inc", 32);
|
||||||
|
mca_btl_tcp_component.tcp_sndbuf =
|
||||||
|
mca_btl_tcp_param_register_int ("sndbuf", 128*1024);
|
||||||
|
mca_btl_tcp_component.tcp_rcvbuf =
|
||||||
|
mca_btl_tcp_param_register_int ("rcvbuf", 128*1024);
|
||||||
|
mca_btl_tcp_module.super.btl_exclusivity =
|
||||||
|
mca_btl_tcp_param_register_int ("exclusivity", 0);
|
||||||
|
mca_btl_tcp_module.super.btl_eager_limit =
|
||||||
|
mca_btl_tcp_param_register_int ("first_frag_size", 64*1024) - sizeof(mca_btl_base_header_t);
|
||||||
|
mca_btl_tcp_module.super.btl_min_send_size =
|
||||||
|
mca_btl_tcp_param_register_int ("min_send_size", 64*1024) - sizeof(mca_btl_base_header_t);
|
||||||
|
mca_btl_tcp_module.super.btl_max_send_size =
|
||||||
|
mca_btl_tcp_param_register_int ("max_send_size", 256*1024) - sizeof(mca_btl_base_header_t);
|
||||||
|
mca_btl_tcp_module.super.btl_min_rdma_size =
|
||||||
|
mca_btl_tcp_param_register_int("min_rdma_size", 1024*1024);
|
||||||
|
mca_btl_tcp_module.super.btl_max_rdma_size =
|
||||||
|
mca_btl_tcp_param_register_int("max_rdma_size", 2*1024*1024);
|
||||||
|
mca_btl_tcp_module.super.btl_flags =
|
||||||
|
mca_btl_tcp_param_register_int("flags", 0);
|
||||||
|
return OMPI_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* module cleanup - sanity checking of queue lengths
|
||||||
|
*/
|
||||||
|
|
||||||
|
int mca_btl_tcp_component_close(void)
|
||||||
|
{
|
||||||
|
opal_list_item_t* item;
|
||||||
|
#ifdef WIN32
|
||||||
|
WSACleanup();
|
||||||
|
#endif
|
||||||
|
|
||||||
|
if(NULL != mca_btl_tcp_component.tcp_if_include)
|
||||||
|
free(mca_btl_tcp_component.tcp_if_include);
|
||||||
|
if(NULL != mca_btl_tcp_component.tcp_if_exclude)
|
||||||
|
free(mca_btl_tcp_component.tcp_if_exclude);
|
||||||
|
if (NULL != mca_btl_tcp_component.tcp_btls)
|
||||||
|
free(mca_btl_tcp_component.tcp_btls);
|
||||||
|
|
||||||
|
if (mca_btl_tcp_component.tcp_listen_sd >= 0) {
|
||||||
|
opal_event_del(&mca_btl_tcp_component.tcp_recv_event);
|
||||||
|
close(mca_btl_tcp_component.tcp_listen_sd);
|
||||||
|
mca_btl_tcp_component.tcp_listen_sd = -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* cleanup any pending events */
|
||||||
|
OPAL_THREAD_LOCK(&mca_btl_tcp_component.tcp_lock);
|
||||||
|
for(item = opal_list_remove_first(&mca_btl_tcp_component.tcp_events);
|
||||||
|
item != NULL;
|
||||||
|
item = opal_list_remove_first(&mca_btl_tcp_component.tcp_events)) {
|
||||||
|
mca_btl_tcp_event_t* event = (mca_btl_tcp_event_t*)item;
|
||||||
|
opal_event_del(&event->event);
|
||||||
|
OBJ_RELEASE(event);
|
||||||
|
}
|
||||||
|
OPAL_THREAD_UNLOCK(&mca_btl_tcp_component.tcp_lock);
|
||||||
|
|
||||||
|
/* release resources */
|
||||||
|
OBJ_DESTRUCT(&mca_btl_tcp_component.tcp_procs);
|
||||||
|
OBJ_DESTRUCT(&mca_btl_tcp_component.tcp_events);
|
||||||
|
OBJ_DESTRUCT(&mca_btl_tcp_component.tcp_frag_eager);
|
||||||
|
OBJ_DESTRUCT(&mca_btl_tcp_component.tcp_frag_max);
|
||||||
|
OBJ_DESTRUCT(&mca_btl_tcp_component.tcp_frag_user);
|
||||||
|
OBJ_DESTRUCT(&mca_btl_tcp_component.tcp_lock);
|
||||||
|
return OMPI_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Create a btl instance and add to modules list.
|
||||||
|
*/
|
||||||
|
|
||||||
|
static int mca_btl_tcp_create(int if_index, const char* if_name)
|
||||||
|
{
|
||||||
|
struct mca_btl_tcp_module_t* btl = (struct mca_btl_tcp_module_t *)malloc(sizeof(mca_btl_tcp_module_t));
|
||||||
|
char param[256];
|
||||||
|
if(NULL == btl)
|
||||||
|
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||||
|
memcpy(btl, &mca_btl_tcp_module, sizeof(mca_btl_tcp_module));
|
||||||
|
OBJ_CONSTRUCT(&btl->tcp_endpoints, opal_list_t);
|
||||||
|
mca_btl_tcp_component.tcp_btls[mca_btl_tcp_component.tcp_num_btls++] = btl;
|
||||||
|
|
||||||
|
/* initialize the btl */
|
||||||
|
btl->tcp_ifindex = if_index;
|
||||||
|
#if MCA_BTL_TCP_STATISTICS
|
||||||
|
btl->tcp_bytes_recv = 0;
|
||||||
|
btl->tcp_bytes_sent = 0;
|
||||||
|
btl->tcp_send_handler = 0;
|
||||||
|
#endif
|
||||||
|
opal_ifindextoaddr(if_index, (struct sockaddr*)&btl->tcp_ifaddr, sizeof(btl->tcp_ifaddr));
|
||||||
|
opal_ifindextomask(if_index, (struct sockaddr*)&btl->tcp_ifmask, sizeof(btl->tcp_ifmask));
|
||||||
|
|
||||||
|
/* allow user to specify interface bandwidth */
|
||||||
|
sprintf(param, "bandwidth_%s", if_name);
|
||||||
|
btl->super.btl_bandwidth = mca_btl_tcp_param_register_int(param, 0);
|
||||||
|
|
||||||
|
/* allow user to override/specify latency ranking */
|
||||||
|
sprintf(param, "latency_%s", if_name);
|
||||||
|
btl->super.btl_latency = mca_btl_tcp_param_register_int(param, 0);
|
||||||
|
|
||||||
|
#if 0 && OMPI_ENABLE_DEBUG
|
||||||
|
BTL_OUTPUT(("interface: %s bandwidth %d latency %d",
|
||||||
|
if_name, btl->super.btl_bandwidth, btl->super.btl_latency));
|
||||||
|
#endif
|
||||||
|
return OMPI_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Create a TCP BTL instance for either:
|
||||||
|
* (1) all interfaces specified by the user
|
||||||
|
* (2) all available interfaces
|
||||||
|
* (3) all available interfaces except for those excluded by the user
|
||||||
|
*/
|
||||||
|
|
||||||
|
static int mca_btl_tcp_component_create_instances(void)
|
||||||
|
{
|
||||||
|
int if_count = opal_ifcount();
|
||||||
|
int if_index;
|
||||||
|
char **include;
|
||||||
|
char **exclude;
|
||||||
|
char **argv;
|
||||||
|
|
||||||
|
if(if_count <= 0)
|
||||||
|
return OMPI_ERROR;
|
||||||
|
|
||||||
|
/* allocate memory for btls */
|
||||||
|
mca_btl_tcp_component.tcp_btls = (mca_btl_tcp_module_t **)malloc(if_count * sizeof(mca_btl_tcp_module_t*));
|
||||||
|
if(NULL == mca_btl_tcp_component.tcp_btls)
|
||||||
|
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||||
|
|
||||||
|
/* if the user specified an interface list - use these exclusively */
|
||||||
|
argv = include = opal_argv_split(mca_btl_tcp_component.tcp_if_include,',');
|
||||||
|
while(argv && *argv) {
|
||||||
|
char* if_name = *argv;
|
||||||
|
int if_index = opal_ifnametoindex(if_name);
|
||||||
|
if(if_index < 0) {
|
||||||
|
BTL_ERROR(("invalid interface \"%s\"", if_name));
|
||||||
|
} else {
|
||||||
|
mca_btl_tcp_create(if_index, if_name);
|
||||||
|
}
|
||||||
|
argv++;
|
||||||
|
}
|
||||||
|
opal_argv_free(include);
|
||||||
|
if(mca_btl_tcp_component.tcp_num_btls)
|
||||||
|
return OMPI_SUCCESS;
|
||||||
|
|
||||||
|
/* if the interface list was not specified by the user, create
|
||||||
|
* a BTL for each interface that was not excluded.
|
||||||
|
*/
|
||||||
|
exclude = opal_argv_split(mca_btl_tcp_component.tcp_if_exclude,',');
|
||||||
|
for(if_index = opal_ifbegin(); if_index >= 0; if_index = opal_ifnext(if_index)) {
|
||||||
|
char if_name[32];
|
||||||
|
opal_ifindextoname(if_index, if_name, sizeof(if_name));
|
||||||
|
|
||||||
|
/* check to see if this interface exists in the exclude list */
|
||||||
|
if(opal_ifcount() > 1) {
|
||||||
|
argv = exclude;
|
||||||
|
while(argv && *argv) {
|
||||||
|
if(strncmp(*argv,if_name,strlen(*argv)) == 0)
|
||||||
|
break;
|
||||||
|
argv++;
|
||||||
|
}
|
||||||
|
/* if this interface was not found in the excluded list - create a BTL */
|
||||||
|
if(argv == 0 || *argv == 0) {
|
||||||
|
mca_btl_tcp_create(if_index, if_name);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
mca_btl_tcp_create(if_index, if_name);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
opal_argv_free(exclude);
|
||||||
|
return OMPI_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Create a listen socket and bind to all interfaces
|
||||||
|
*/
|
||||||
|
|
||||||
|
static int mca_btl_tcp_component_create_listen(void)
|
||||||
|
{
|
||||||
|
int flags;
|
||||||
|
struct sockaddr_in inaddr;
|
||||||
|
ompi_socklen_t addrlen;
|
||||||
|
|
||||||
|
/* create a listen socket for incoming connections */
|
||||||
|
mca_btl_tcp_component.tcp_listen_sd = socket(AF_INET, SOCK_STREAM, 0);
|
||||||
|
if(mca_btl_tcp_component.tcp_listen_sd < 0) {
|
||||||
|
BTL_ERROR(("socket() failed with errno=%d", ompi_socket_errno));
|
||||||
|
return OMPI_ERROR;
|
||||||
|
}
|
||||||
|
mca_btl_tcp_set_socket_options(mca_btl_tcp_component.tcp_listen_sd);
|
||||||
|
|
||||||
|
/* bind to all addresses and dynamically assigned port */
|
||||||
|
memset(&inaddr, 0, sizeof(inaddr));
|
||||||
|
inaddr.sin_family = AF_INET;
|
||||||
|
inaddr.sin_addr.s_addr = INADDR_ANY;
|
||||||
|
inaddr.sin_port = 0;
|
||||||
|
|
||||||
|
if(bind(mca_btl_tcp_component.tcp_listen_sd, (struct sockaddr*)&inaddr, sizeof(inaddr)) < 0) {
|
||||||
|
BTL_ERROR(("bind() failed with errno=%d", ompi_socket_errno));
|
||||||
|
return OMPI_ERROR;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* resolve system assignend port */
|
||||||
|
addrlen = sizeof(struct sockaddr_in);
|
||||||
|
if(getsockname(mca_btl_tcp_component.tcp_listen_sd, (struct sockaddr*)&inaddr, &addrlen) < 0) {
|
||||||
|
BTL_ERROR(("getsockname() failed with errno=%d", ompi_socket_errno));
|
||||||
|
return OMPI_ERROR;
|
||||||
|
}
|
||||||
|
mca_btl_tcp_component.tcp_listen_port = inaddr.sin_port;
|
||||||
|
|
||||||
|
/* setup listen backlog to maximum allowed by kernel */
|
||||||
|
if(listen(mca_btl_tcp_component.tcp_listen_sd, SOMAXCONN) < 0) {
|
||||||
|
BTL_ERROR(("listen() failed with errno=%d", ompi_socket_errno));
|
||||||
|
return OMPI_ERROR;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* set socket up to be non-blocking, otherwise accept could block */
|
||||||
|
if((flags = fcntl(mca_btl_tcp_component.tcp_listen_sd, F_GETFL, 0)) < 0) {
|
||||||
|
BTL_ERROR(("fcntl(F_GETFL) failed with errno=%d", ompi_socket_errno));
|
||||||
|
return OMPI_ERROR;
|
||||||
|
} else {
|
||||||
|
flags |= O_NONBLOCK;
|
||||||
|
if(fcntl(mca_btl_tcp_component.tcp_listen_sd, F_SETFL, flags) < 0) {
|
||||||
|
BTL_ERROR(("fcntl(F_SETFL) failed with errno=%d", ompi_socket_errno));
|
||||||
|
return OMPI_ERROR;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* register listen port */
|
||||||
|
opal_event_set(
|
||||||
|
&mca_btl_tcp_component.tcp_recv_event,
|
||||||
|
mca_btl_tcp_component.tcp_listen_sd,
|
||||||
|
OPAL_EV_READ|OPAL_EV_PERSIST,
|
||||||
|
mca_btl_tcp_component_recv_handler,
|
||||||
|
0);
|
||||||
|
opal_event_add(&mca_btl_tcp_component.tcp_recv_event,0);
|
||||||
|
return OMPI_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Register TCP module addressing information. The MCA framework
|
||||||
|
* will make this available to all peers.
|
||||||
|
*/
|
||||||
|
|
||||||
|
static int mca_btl_tcp_component_exchange(void)
|
||||||
|
{
|
||||||
|
int rc=0;
|
||||||
|
size_t i=0;
|
||||||
|
size_t size = mca_btl_tcp_component.tcp_num_btls * sizeof(mca_btl_tcp_addr_t);
|
||||||
|
if(mca_btl_tcp_component.tcp_num_btls != 0) {
|
||||||
|
mca_btl_tcp_addr_t *addrs = (mca_btl_tcp_addr_t *)malloc(size);
|
||||||
|
for(i=0; i<mca_btl_tcp_component.tcp_num_btls; i++) {
|
||||||
|
struct mca_btl_tcp_module_t* btl = mca_btl_tcp_component.tcp_btls[i];
|
||||||
|
addrs[i].addr_inet = btl->tcp_ifaddr.sin_addr;
|
||||||
|
addrs[i].addr_port = mca_btl_tcp_component.tcp_listen_port;
|
||||||
|
addrs[i].addr_inuse = 0;
|
||||||
|
}
|
||||||
|
rc = mca_base_modex_send(&mca_btl_tcp_component.super.btl_version, addrs, size);
|
||||||
|
free(addrs);
|
||||||
|
}
|
||||||
|
return rc;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* TCP module initialization:
|
||||||
|
* (1) read interface list from kernel and compare against module parameters
|
||||||
|
* then create a BTL instance for selected interfaces
|
||||||
|
* (2) setup TCP listen socket for incoming connection attempts
|
||||||
|
* (3) register BTL parameters with the MCA
|
||||||
|
*/
|
||||||
|
mca_btl_base_module_t** mca_btl_tcp_component_init(int *num_btl_modules,
|
||||||
|
bool enable_progress_threads,
|
||||||
|
bool enable_mpi_threads)
|
||||||
|
{
|
||||||
|
mca_btl_base_module_t **btls;
|
||||||
|
*num_btl_modules = 0;
|
||||||
|
|
||||||
|
/* initialize free lists */
|
||||||
|
ompi_free_list_init( &mca_btl_tcp_component.tcp_frag_eager,
|
||||||
|
sizeof (mca_btl_tcp_frag_eager_t) + mca_btl_tcp_module.super.btl_eager_limit,
|
||||||
|
OBJ_CLASS (mca_btl_tcp_frag_eager_t),
|
||||||
|
mca_btl_tcp_component.tcp_free_list_num,
|
||||||
|
mca_btl_tcp_component.tcp_free_list_max,
|
||||||
|
mca_btl_tcp_component.tcp_free_list_inc,
|
||||||
|
NULL );
|
||||||
|
|
||||||
|
ompi_free_list_init( &mca_btl_tcp_component.tcp_frag_max,
|
||||||
|
sizeof (mca_btl_tcp_frag_max_t) + mca_btl_tcp_module.super.btl_max_send_size,
|
||||||
|
OBJ_CLASS (mca_btl_tcp_frag_max_t),
|
||||||
|
mca_btl_tcp_component.tcp_free_list_num,
|
||||||
|
mca_btl_tcp_component.tcp_free_list_max,
|
||||||
|
mca_btl_tcp_component.tcp_free_list_inc,
|
||||||
|
NULL );
|
||||||
|
|
||||||
|
ompi_free_list_init( &mca_btl_tcp_component.tcp_frag_user,
|
||||||
|
sizeof (mca_btl_tcp_frag_user_t),
|
||||||
|
OBJ_CLASS (mca_btl_tcp_frag_user_t),
|
||||||
|
mca_btl_tcp_component.tcp_free_list_num,
|
||||||
|
mca_btl_tcp_component.tcp_free_list_max,
|
||||||
|
mca_btl_tcp_component.tcp_free_list_inc,
|
||||||
|
NULL );
|
||||||
|
|
||||||
|
/* create a BTL TCP module for selected interfaces */
|
||||||
|
if(mca_btl_tcp_component_create_instances() != OMPI_SUCCESS)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
/* create a TCP listen socket for incoming connection attempts */
|
||||||
|
if(mca_btl_tcp_component_create_listen() != OMPI_SUCCESS)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
/* publish TCP parameters with the MCA framework */
|
||||||
|
if(mca_btl_tcp_component_exchange() != OMPI_SUCCESS)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
btls = (mca_btl_base_module_t **)malloc(mca_btl_tcp_component.tcp_num_btls *
|
||||||
|
sizeof(mca_btl_base_module_t*));
|
||||||
|
if(NULL == btls)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
memcpy(btls, mca_btl_tcp_component.tcp_btls, mca_btl_tcp_component.tcp_num_btls*sizeof(mca_btl_tcp_module_t*));
|
||||||
|
*num_btl_modules = mca_btl_tcp_component.tcp_num_btls;
|
||||||
|
return btls;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* TCP module control
|
||||||
|
*/
|
||||||
|
|
||||||
|
int mca_btl_tcp_component_control(int param, void* value, size_t size)
|
||||||
|
{
|
||||||
|
return OMPI_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Called by mca_btl_tcp_component_recv() when the TCP listen
|
||||||
|
* socket has pending connection requests. Accept incoming
|
||||||
|
* requests and queue for completion of the connection handshake.
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
static void mca_btl_tcp_component_accept(void)
|
||||||
|
{
|
||||||
|
while(true) {
|
||||||
|
ompi_socklen_t addrlen = sizeof(struct sockaddr_in);
|
||||||
|
struct sockaddr_in addr;
|
||||||
|
mca_btl_tcp_event_t *event;
|
||||||
|
int sd = accept(mca_btl_tcp_component.tcp_listen_sd, (struct sockaddr*)&addr, &addrlen);
|
||||||
|
if(sd < 0) {
|
||||||
|
IMPORTANT_WINDOWS_COMMENT();
|
||||||
|
if(ompi_socket_errno == EINTR)
|
||||||
|
continue;
|
||||||
|
if(ompi_socket_errno != EAGAIN || ompi_socket_errno != EWOULDBLOCK)
|
||||||
|
BTL_ERROR(("accept() failed with errno %d.", ompi_socket_errno));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
mca_btl_tcp_set_socket_options(sd);
|
||||||
|
|
||||||
|
/* wait for receipt of peers process identifier to complete this connection */
|
||||||
|
|
||||||
|
event = OBJ_NEW(mca_btl_tcp_event_t);
|
||||||
|
opal_event_set(&event->event, sd, OPAL_EV_READ, mca_btl_tcp_component_recv_handler, event);
|
||||||
|
opal_event_add(&event->event, 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Event callback when there is data available on the registered
|
||||||
|
* socket to recv.
|
||||||
|
*/
|
||||||
|
static void mca_btl_tcp_component_recv_handler(int sd, short flags, void* user)
|
||||||
|
{
|
||||||
|
orte_process_name_t guid;
|
||||||
|
struct sockaddr_in addr;
|
||||||
|
int retval;
|
||||||
|
mca_btl_tcp_proc_t* btl_proc;
|
||||||
|
ompi_socklen_t addr_len = sizeof(addr);
|
||||||
|
mca_btl_tcp_event_t *event = (mca_btl_tcp_event_t *)user;
|
||||||
|
|
||||||
|
/* accept new connections on the listen socket */
|
||||||
|
if(mca_btl_tcp_component.tcp_listen_sd == sd) {
|
||||||
|
mca_btl_tcp_component_accept();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
OBJ_RELEASE(event);
|
||||||
|
|
||||||
|
/* recv the process identifier */
|
||||||
|
retval = recv(sd, (char *)&guid, sizeof(guid), 0);
|
||||||
|
if(retval != sizeof(guid)) {
|
||||||
|
close(sd);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* now set socket up to be non-blocking */
|
||||||
|
if((flags = fcntl(sd, F_GETFL, 0)) < 0) {
|
||||||
|
BTL_ERROR(("fcntl(F_GETFL) failed with errno=%d", ompi_socket_errno));
|
||||||
|
} else {
|
||||||
|
flags |= O_NONBLOCK;
|
||||||
|
if(fcntl(sd, F_SETFL, flags) < 0) {
|
||||||
|
BTL_ERROR(("fcntl(F_SETFL) failed with errno=%d", ompi_socket_errno));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* lookup the corresponding process */
|
||||||
|
btl_proc = mca_btl_tcp_proc_lookup(&guid);
|
||||||
|
if(NULL == btl_proc) {
|
||||||
|
BTL_ERROR(("errno=%d",errno));
|
||||||
|
close(sd);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* lookup peer address */
|
||||||
|
if(getpeername(sd, (struct sockaddr*)&addr, &addr_len) != 0) {
|
||||||
|
BTL_ERROR(("getpeername() failed with errno=%d", ompi_socket_errno));
|
||||||
|
close(sd);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* are there any existing peer instances will to accept this connection */
|
||||||
|
if(mca_btl_tcp_proc_accept(btl_proc, &addr, sd) == false) {
|
||||||
|
close(sd);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
686
ompi/mca/btl/tcp/btl_tcp_endpoint.c
Обычный файл
686
ompi/mca/btl/tcp/btl_tcp_endpoint.c
Обычный файл
@ -0,0 +1,686 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2004-2005 The Trustees of Indiana University.
|
||||||
|
* All rights reserved.
|
||||||
|
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
|
||||||
|
* All rights reserved.
|
||||||
|
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||||
|
* University of Stuttgart. All rights reserved.
|
||||||
|
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||||
|
* All rights reserved.
|
||||||
|
* $COPYRIGHT$
|
||||||
|
*
|
||||||
|
* Additional copyrights may follow
|
||||||
|
*
|
||||||
|
* $HEADER$
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
#include "ompi_config.h"
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
#ifdef HAVE_UNISTD_H
|
||||||
|
#include <unistd.h>
|
||||||
|
#endif
|
||||||
|
#include "include/ompi_socket_errno.h"
|
||||||
|
#ifdef HAVE_SYS_TYPES_H
|
||||||
|
#include <sys/types.h>
|
||||||
|
#endif
|
||||||
|
#ifdef HAVE_FCNTL_H
|
||||||
|
#include <fcntl.h>
|
||||||
|
#endif
|
||||||
|
#ifdef HAVE_NETINET_IN_H
|
||||||
|
#include <netinet/in.h>
|
||||||
|
#endif
|
||||||
|
#ifdef HAVE_NETINET_TCP_H
|
||||||
|
#include <netinet/tcp.h>
|
||||||
|
#endif
|
||||||
|
#ifdef HAVE_ARPA_INET_H
|
||||||
|
#include <arpa/inet.h>
|
||||||
|
#endif
|
||||||
|
#include <sys/time.h>
|
||||||
|
#include <time.h>
|
||||||
|
#include "include/types.h"
|
||||||
|
#include "mca/ns/base/base.h"
|
||||||
|
#include "mca/oob/base/base.h"
|
||||||
|
#include "mca/rml/rml.h"
|
||||||
|
#include "mca/errmgr/errmgr.h"
|
||||||
|
#include "mca/btl/base/btl_base_error.h"
|
||||||
|
#include "dps/dps.h"
|
||||||
|
#include "btl_tcp.h"
|
||||||
|
#include "btl_tcp_endpoint.h"
|
||||||
|
#include "btl_tcp_proc.h"
|
||||||
|
#include "btl_tcp_frag.h"
|
||||||
|
#include "btl_tcp_addr.h"
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Initialize state of the endpoint instance.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
static void mca_btl_tcp_endpoint_construct(mca_btl_tcp_endpoint_t* endpoint)
|
||||||
|
{
|
||||||
|
endpoint->endpoint_btl = NULL;
|
||||||
|
endpoint->endpoint_proc = NULL;
|
||||||
|
endpoint->endpoint_addr = NULL;
|
||||||
|
endpoint->endpoint_sd = -1;
|
||||||
|
endpoint->endpoint_send_frag = 0;
|
||||||
|
endpoint->endpoint_recv_frag = 0;
|
||||||
|
endpoint->endpoint_send_event.ev_flags = 0;
|
||||||
|
endpoint->endpoint_recv_event.ev_flags = 0;
|
||||||
|
endpoint->endpoint_state = MCA_BTL_TCP_CLOSED;
|
||||||
|
endpoint->endpoint_retries = 0;
|
||||||
|
endpoint->endpoint_nbo = false;
|
||||||
|
OBJ_CONSTRUCT(&endpoint->endpoint_frags, opal_list_t);
|
||||||
|
OBJ_CONSTRUCT(&endpoint->endpoint_send_lock, opal_mutex_t);
|
||||||
|
OBJ_CONSTRUCT(&endpoint->endpoint_recv_lock, opal_mutex_t);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Destroy a endpoint
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
static void mca_btl_tcp_endpoint_destruct(mca_btl_tcp_endpoint_t* endpoint)
|
||||||
|
{
|
||||||
|
mca_btl_tcp_proc_remove(endpoint->endpoint_proc, endpoint);
|
||||||
|
mca_btl_tcp_endpoint_close(endpoint);
|
||||||
|
OBJ_DESTRUCT(&endpoint->endpoint_frags);
|
||||||
|
OBJ_DESTRUCT(&endpoint->endpoint_send_lock);
|
||||||
|
OBJ_DESTRUCT(&endpoint->endpoint_recv_lock);
|
||||||
|
}
|
||||||
|
|
||||||
|
OBJ_CLASS_INSTANCE(
|
||||||
|
mca_btl_tcp_endpoint_t,
|
||||||
|
opal_list_item_t,
|
||||||
|
mca_btl_tcp_endpoint_construct,
|
||||||
|
mca_btl_tcp_endpoint_destruct);
|
||||||
|
|
||||||
|
|
||||||
|
#define IMPORTANT_WINDOWS_COMMENT() \
|
||||||
|
/* In windows, many of the socket functions return an EWOULDBLOCK instead of \
|
||||||
|
things like EAGAIN, EINPROGRESS, etc. It has been verified that this will \
|
||||||
|
not conflict with other error codes that are returned by these functions \
|
||||||
|
under UNIX/Linux environments */
|
||||||
|
|
||||||
|
static void mca_btl_tcp_endpoint_construct(mca_btl_base_endpoint_t* btl_endpoint);
|
||||||
|
static void mca_btl_tcp_endpoint_destruct(mca_btl_base_endpoint_t* btl_endpoint);
|
||||||
|
static int mca_btl_tcp_endpoint_start_connect(mca_btl_base_endpoint_t*);
|
||||||
|
static void mca_btl_tcp_endpoint_connected(mca_btl_base_endpoint_t*);
|
||||||
|
static void mca_btl_tcp_endpoint_recv_handler(int sd, short flags, void* user);
|
||||||
|
static void mca_btl_tcp_endpoint_send_handler(int sd, short flags, void* user);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Diagnostics: change this to "1" to enable the function
|
||||||
|
* mca_btl_tcp_endpoint_dump(), below
|
||||||
|
*/
|
||||||
|
#define WANT_PEER_DUMP 0
|
||||||
|
/*
|
||||||
|
* diagnostics
|
||||||
|
*/
|
||||||
|
|
||||||
|
#if WANT_PEER_DUMP
|
||||||
|
static void mca_btl_tcp_endpoint_dump(mca_btl_base_endpoint_t* btl_endpoint, const char* msg)
|
||||||
|
{
|
||||||
|
char src[64];
|
||||||
|
char dst[64];
|
||||||
|
int sndbuf,rcvbuf,nodelay,flags;
|
||||||
|
struct sockaddr_in inaddr;
|
||||||
|
ompi_socklen_t obtlen;
|
||||||
|
ompi_socklen_t addrlen = sizeof(struct sockaddr_in);
|
||||||
|
|
||||||
|
getsockname(btl_endpoint->endpoint_sd, (struct sockaddr*)&inaddr, &addrlen);
|
||||||
|
sprintf(src, "%s", inet_ntoa(inaddr.sin_addr));
|
||||||
|
getpeername(btl_endpoint->endpoint_sd, (struct sockaddr*)&inaddr, &addrlen);
|
||||||
|
sprintf(dst, "%s", inet_ntoa(inaddr.sin_addr));
|
||||||
|
|
||||||
|
if((flags = fcntl(btl_endpoint->endpoint_sd, F_GETFL, 0)) < 0) {
|
||||||
|
BTL_ERROR(("fcntl(F_GETFL) failed with errno=%d", ompi_socket_errno));
|
||||||
|
}
|
||||||
|
|
||||||
|
#if defined(SO_SNDBUF)
|
||||||
|
obtlen = sizeof(sndbuf);
|
||||||
|
if(getsockopt(btl_endpoint->endpoint_sd, SOL_SOCKET, SO_SNDBUF, (char *)&sndbuf, &obtlen) < 0) {
|
||||||
|
BTL_ERROR(("SO_SNDBUF option: errno %d", ompi_socket_errno));
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
sndbuf = -1;
|
||||||
|
#endif
|
||||||
|
#if defined(SO_RCVBUF)
|
||||||
|
obtlen = sizeof(rcvbuf);
|
||||||
|
if(getsockopt(btl_endpoint->endpoint_sd, SOL_SOCKET, SO_RCVBUF, (char *)&rcvbuf, &obtlen) < 0) {
|
||||||
|
BTL_ERROR(("SO_RCVBUF option: errno %d", ompi_socket_errno));
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
rcvbuf = -1;
|
||||||
|
#endif
|
||||||
|
#if defined(TCP_NODELAY)
|
||||||
|
obtlen = sizeof(nodelay);
|
||||||
|
if(getsockopt(btl_endpoint->endpoint_sd, IPPROTO_TCP, TCP_NODELAY, (char *)&nodelay, &obtlen) < 0) {
|
||||||
|
BTL_ERROR(("TCP_NODELAY option: errno %d", ompi_socket_errno));
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
nodelay = 0;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
BTL_DEBUG(("%s: %s - %s nodelay %d sndbuf %d rcvbuf %d flags %08x",
|
||||||
|
msg, src, dst, nodelay, sndbuf, rcvbuf, flags));
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Initialize events to be used by the endpoint instance for TCP select/poll callbacks.
|
||||||
|
*/
|
||||||
|
|
||||||
|
static inline void mca_btl_tcp_endpoint_event_init(mca_btl_base_endpoint_t* btl_endpoint, int sd)
|
||||||
|
{
|
||||||
|
opal_event_set(
|
||||||
|
&btl_endpoint->endpoint_recv_event,
|
||||||
|
btl_endpoint->endpoint_sd,
|
||||||
|
OPAL_EV_READ|OPAL_EV_PERSIST,
|
||||||
|
mca_btl_tcp_endpoint_recv_handler,
|
||||||
|
btl_endpoint);
|
||||||
|
opal_event_set(
|
||||||
|
&btl_endpoint->endpoint_send_event,
|
||||||
|
btl_endpoint->endpoint_sd,
|
||||||
|
OPAL_EV_WRITE|OPAL_EV_PERSIST,
|
||||||
|
mca_btl_tcp_endpoint_send_handler,
|
||||||
|
btl_endpoint);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Attempt to send a fragment using a given endpoint. If the endpoint is not connected,
|
||||||
|
* queue the fragment and start the connection as required.
|
||||||
|
*/
|
||||||
|
|
||||||
|
int mca_btl_tcp_endpoint_send(mca_btl_base_endpoint_t* btl_endpoint, mca_btl_tcp_frag_t* frag)
|
||||||
|
{
|
||||||
|
int rc = OMPI_SUCCESS;
|
||||||
|
OPAL_THREAD_LOCK(&btl_endpoint->endpoint_send_lock);
|
||||||
|
switch(btl_endpoint->endpoint_state) {
|
||||||
|
case MCA_BTL_TCP_CONNECTING:
|
||||||
|
case MCA_BTL_TCP_CONNECT_ACK:
|
||||||
|
case MCA_BTL_TCP_CLOSED:
|
||||||
|
opal_list_append(&btl_endpoint->endpoint_frags, (opal_list_item_t*)frag);
|
||||||
|
if(btl_endpoint->endpoint_state == MCA_BTL_TCP_CLOSED)
|
||||||
|
rc = mca_btl_tcp_endpoint_start_connect(btl_endpoint);
|
||||||
|
break;
|
||||||
|
case MCA_BTL_TCP_FAILED:
|
||||||
|
rc = OMPI_ERR_UNREACH;
|
||||||
|
break;
|
||||||
|
case MCA_BTL_TCP_CONNECTED:
|
||||||
|
if (NULL != btl_endpoint->endpoint_send_frag) {
|
||||||
|
opal_list_append(&btl_endpoint->endpoint_frags, (opal_list_item_t*)frag);
|
||||||
|
} else {
|
||||||
|
if(mca_btl_tcp_frag_send(frag, btl_endpoint->endpoint_sd)) {
|
||||||
|
OPAL_THREAD_UNLOCK(&btl_endpoint->endpoint_send_lock);
|
||||||
|
frag->base.des_cbfunc(&frag->btl->super, frag->endpoint, &frag->base, frag->rc);
|
||||||
|
return OMPI_SUCCESS;
|
||||||
|
} else {
|
||||||
|
btl_endpoint->endpoint_send_frag = frag;
|
||||||
|
opal_event_add(&btl_endpoint->endpoint_send_event, 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case MCA_BTL_TCP_SHUTDOWN:
|
||||||
|
rc = OMPI_ERROR;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
OPAL_THREAD_UNLOCK(&btl_endpoint->endpoint_send_lock);
|
||||||
|
return rc;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* A blocking send on a non-blocking socket. Used to send the small amount of connection
|
||||||
|
* information that identifies the endpoints endpoint.
|
||||||
|
*/
|
||||||
|
static int mca_btl_tcp_endpoint_send_blocking(mca_btl_base_endpoint_t* btl_endpoint, void* data, size_t size)
|
||||||
|
{
|
||||||
|
unsigned char* ptr = (unsigned char*)data;
|
||||||
|
size_t cnt = 0;
|
||||||
|
while(cnt < size) {
|
||||||
|
int retval = send(btl_endpoint->endpoint_sd, (const char *)ptr+cnt, size-cnt, 0);
|
||||||
|
if(retval < 0) {
|
||||||
|
IMPORTANT_WINDOWS_COMMENT();
|
||||||
|
if(ompi_socket_errno != EINTR && ompi_socket_errno != EAGAIN && ompi_socket_errno != EWOULDBLOCK) {
|
||||||
|
BTL_ERROR(("send() failed with errno=%d",ompi_socket_errno));
|
||||||
|
mca_btl_tcp_endpoint_close(btl_endpoint);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
cnt += retval;
|
||||||
|
}
|
||||||
|
return cnt;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Send the globally unique identifier for this process to a endpoint on
|
||||||
|
* a newly connected socket.
|
||||||
|
*/
|
||||||
|
|
||||||
|
static int mca_btl_tcp_endpoint_send_connect_ack(mca_btl_base_endpoint_t* btl_endpoint)
|
||||||
|
{
|
||||||
|
/* send process identifier to remote endpoint */
|
||||||
|
mca_btl_tcp_proc_t* btl_proc = mca_btl_tcp_proc_local();
|
||||||
|
if(mca_btl_tcp_endpoint_send_blocking(btl_endpoint, &btl_proc->proc_name, sizeof(btl_proc->proc_name)) !=
|
||||||
|
sizeof(btl_proc->proc_name)) {
|
||||||
|
return OMPI_ERR_UNREACH;
|
||||||
|
}
|
||||||
|
return OMPI_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Check the state of this endpoint. If the incoming connection request matches
|
||||||
|
* our endpoints address, check the state of our connection:
|
||||||
|
* (1) if a connection has not been attempted, accept the connection
|
||||||
|
* (2) if a connection has not been established, and the endpoints process identifier
|
||||||
|
* is less than the local process, accept the connection
|
||||||
|
* otherwise, reject the connection and continue with the current connection
|
||||||
|
*/
|
||||||
|
|
||||||
|
bool mca_btl_tcp_endpoint_accept(mca_btl_base_endpoint_t* btl_endpoint, struct sockaddr_in* addr, int sd)
|
||||||
|
{
|
||||||
|
mca_btl_tcp_addr_t* btl_addr;
|
||||||
|
mca_btl_tcp_proc_t* this_proc = mca_btl_tcp_proc_local();
|
||||||
|
orte_ns_cmp_bitmask_t mask = ORTE_NS_CMP_ALL;
|
||||||
|
int cmpval;
|
||||||
|
|
||||||
|
OPAL_THREAD_LOCK(&btl_endpoint->endpoint_recv_lock);
|
||||||
|
OPAL_THREAD_LOCK(&btl_endpoint->endpoint_send_lock);
|
||||||
|
if((btl_addr = btl_endpoint->endpoint_addr) != NULL &&
|
||||||
|
btl_addr->addr_inet.s_addr == addr->sin_addr.s_addr) {
|
||||||
|
mca_btl_tcp_proc_t *endpoint_proc = btl_endpoint->endpoint_proc;
|
||||||
|
cmpval = orte_ns.compare(mask,
|
||||||
|
&endpoint_proc->proc_ompi->proc_name,
|
||||||
|
&this_proc->proc_ompi->proc_name);
|
||||||
|
if((btl_endpoint->endpoint_sd < 0) ||
|
||||||
|
(btl_endpoint->endpoint_state != MCA_BTL_TCP_CONNECTED &&
|
||||||
|
cmpval < 0)) {
|
||||||
|
mca_btl_tcp_endpoint_close(btl_endpoint);
|
||||||
|
btl_endpoint->endpoint_sd = sd;
|
||||||
|
if(mca_btl_tcp_endpoint_send_connect_ack(btl_endpoint) != OMPI_SUCCESS) {
|
||||||
|
mca_btl_tcp_endpoint_close(btl_endpoint);
|
||||||
|
OPAL_THREAD_UNLOCK(&btl_endpoint->endpoint_send_lock);
|
||||||
|
OPAL_THREAD_UNLOCK(&btl_endpoint->endpoint_recv_lock);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
mca_btl_tcp_endpoint_event_init(btl_endpoint, sd);
|
||||||
|
opal_event_add(&btl_endpoint->endpoint_recv_event, 0);
|
||||||
|
mca_btl_tcp_endpoint_connected(btl_endpoint);
|
||||||
|
#if OMPI_ENABLE_DEBUG && WANT_PEER_DUMP
|
||||||
|
mca_btl_tcp_endpoint_dump(btl_endpoint, "accepted");
|
||||||
|
#endif
|
||||||
|
OPAL_THREAD_UNLOCK(&btl_endpoint->endpoint_send_lock);
|
||||||
|
OPAL_THREAD_UNLOCK(&btl_endpoint->endpoint_recv_lock);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
OPAL_THREAD_UNLOCK(&btl_endpoint->endpoint_send_lock);
|
||||||
|
OPAL_THREAD_UNLOCK(&btl_endpoint->endpoint_recv_lock);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Remove any event registrations associated with the socket
|
||||||
|
* and update the endpoint state to reflect the connection has
|
||||||
|
* been closed.
|
||||||
|
*/
|
||||||
|
|
||||||
|
void mca_btl_tcp_endpoint_close(mca_btl_base_endpoint_t* btl_endpoint)
|
||||||
|
{
|
||||||
|
if(btl_endpoint->endpoint_sd >= 0) {
|
||||||
|
opal_event_del(&btl_endpoint->endpoint_recv_event);
|
||||||
|
opal_event_del(&btl_endpoint->endpoint_send_event);
|
||||||
|
close(btl_endpoint->endpoint_sd);
|
||||||
|
btl_endpoint->endpoint_sd = -1;
|
||||||
|
}
|
||||||
|
btl_endpoint->endpoint_state = MCA_BTL_TCP_CLOSED;
|
||||||
|
btl_endpoint->endpoint_retries++;
|
||||||
|
}
|
||||||
|
|
||||||
|
void mca_btl_tcp_endpoint_shutdown(mca_btl_base_endpoint_t* btl_endpoint)
|
||||||
|
{
|
||||||
|
OPAL_THREAD_LOCK(&btl_endpoint->endpoint_recv_lock);
|
||||||
|
OPAL_THREAD_LOCK(&btl_endpoint->endpoint_send_lock);
|
||||||
|
mca_btl_tcp_endpoint_close(btl_endpoint);
|
||||||
|
btl_endpoint->endpoint_state = MCA_BTL_TCP_SHUTDOWN;
|
||||||
|
OPAL_THREAD_UNLOCK(&btl_endpoint->endpoint_send_lock);
|
||||||
|
OPAL_THREAD_UNLOCK(&btl_endpoint->endpoint_recv_lock);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Setup endpoint state to reflect that connection has been established,
|
||||||
|
* and start any pending sends.
|
||||||
|
*/
|
||||||
|
|
||||||
|
static void mca_btl_tcp_endpoint_connected(mca_btl_base_endpoint_t* btl_endpoint)
|
||||||
|
{
|
||||||
|
/* setup socket options */
|
||||||
|
btl_endpoint->endpoint_state = MCA_BTL_TCP_CONNECTED;
|
||||||
|
btl_endpoint->endpoint_retries = 0;
|
||||||
|
if(opal_list_get_size(&btl_endpoint->endpoint_frags) > 0) {
|
||||||
|
if(NULL == btl_endpoint->endpoint_send_frag)
|
||||||
|
btl_endpoint->endpoint_send_frag = (mca_btl_tcp_frag_t*)
|
||||||
|
opal_list_remove_first(&btl_endpoint->endpoint_frags);
|
||||||
|
opal_event_add(&btl_endpoint->endpoint_send_event, 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* A blocking recv on a non-blocking socket. Used to receive the small amount of connection
|
||||||
|
* information that identifies the endpoints endpoint.
|
||||||
|
*/
|
||||||
|
static int mca_btl_tcp_endpoint_recv_blocking(mca_btl_base_endpoint_t* btl_endpoint, void* data, size_t size)
|
||||||
|
{
|
||||||
|
unsigned char* ptr = (unsigned char*)data;
|
||||||
|
size_t cnt = 0;
|
||||||
|
while(cnt < size) {
|
||||||
|
int retval = recv(btl_endpoint->endpoint_sd, (char *)ptr+cnt, size-cnt, 0);
|
||||||
|
|
||||||
|
/* remote closed connection */
|
||||||
|
if(retval == 0) {
|
||||||
|
mca_btl_tcp_endpoint_close(btl_endpoint);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* socket is non-blocking so handle errors */
|
||||||
|
if(retval < 0) {
|
||||||
|
IMPORTANT_WINDOWS_COMMENT();
|
||||||
|
if(ompi_socket_errno != EINTR && ompi_socket_errno != EAGAIN && ompi_socket_errno != EWOULDBLOCK) {
|
||||||
|
BTL_ERROR(("recv() failed with errno=%d",ompi_socket_errno));
|
||||||
|
mca_btl_tcp_endpoint_close(btl_endpoint);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
cnt += retval;
|
||||||
|
}
|
||||||
|
return cnt;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Receive the endpoints globally unique process identification from a newly
|
||||||
|
* connected socket and verify the expected response. If so, move the
|
||||||
|
* socket to a connected state.
|
||||||
|
*/
|
||||||
|
|
||||||
|
static int mca_btl_tcp_endpoint_recv_connect_ack(mca_btl_base_endpoint_t* btl_endpoint)
|
||||||
|
{
|
||||||
|
orte_process_name_t guid;
|
||||||
|
mca_btl_tcp_proc_t* btl_proc = btl_endpoint->endpoint_proc;
|
||||||
|
|
||||||
|
if((mca_btl_tcp_endpoint_recv_blocking(btl_endpoint, &guid, sizeof(orte_process_name_t))) != sizeof(orte_process_name_t)) {
|
||||||
|
return OMPI_ERR_UNREACH;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* compare this to the expected values */
|
||||||
|
if(memcmp(&btl_proc->proc_name, &guid, sizeof(orte_process_name_t)) != 0) {
|
||||||
|
BTL_ERROR(("received unexpected process identifier [%lu,%lu,%lu]",
|
||||||
|
ORTE_NAME_ARGS(&guid)));
|
||||||
|
mca_btl_tcp_endpoint_close(btl_endpoint);
|
||||||
|
return OMPI_ERR_UNREACH;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* connected */
|
||||||
|
mca_btl_tcp_endpoint_connected(btl_endpoint);
|
||||||
|
#if OMPI_ENABLE_DEBUG && WANT_PEER_DUMP
|
||||||
|
mca_btl_tcp_endpoint_dump(btl_endpoint, "connected");
|
||||||
|
#endif
|
||||||
|
return OMPI_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void mca_btl_tcp_set_socket_options(int sd)
|
||||||
|
{
|
||||||
|
int optval;
|
||||||
|
#if defined(TCP_NODELAY)
|
||||||
|
optval = 1;
|
||||||
|
if(setsockopt(sd, IPPROTO_TCP, TCP_NODELAY, (char *)&optval, sizeof(optval)) < 0) {
|
||||||
|
BTL_ERROR(("setsockopt(TCP_NODELAY) failed with errno=%d", ompi_socket_errno));
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
#if defined(SO_SNDBUF)
|
||||||
|
if(mca_btl_tcp_component.tcp_sndbuf > 0 &&
|
||||||
|
setsockopt(sd, SOL_SOCKET, SO_SNDBUF, (char *)&mca_btl_tcp_component.tcp_sndbuf, sizeof(int)) < 0) {
|
||||||
|
BTL_ERROR(("setsockopt(SO_SNDBUF) failed with errno %d", ompi_socket_errno));
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
#if defined(SO_RCVBUF)
|
||||||
|
if(mca_btl_tcp_component.tcp_rcvbuf > 0 &&
|
||||||
|
setsockopt(sd, SOL_SOCKET, SO_RCVBUF, (char *)&mca_btl_tcp_component.tcp_rcvbuf, sizeof(int)) < 0) {
|
||||||
|
BTL_ERROR(("setsockopt(SO_RCVBUF) failed with errno %d", ompi_socket_errno));
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Start a connection to the endpoint. This will likely not complete,
|
||||||
|
* as the socket is set to non-blocking, so register for event
|
||||||
|
* notification of connect completion. On connection we send
|
||||||
|
* our globally unique process identifier to the endpoint and wait for
|
||||||
|
* the endpoints response.
|
||||||
|
*/
|
||||||
|
|
||||||
|
static int mca_btl_tcp_endpoint_start_connect(mca_btl_base_endpoint_t* btl_endpoint)
|
||||||
|
{
|
||||||
|
int rc,flags;
|
||||||
|
struct sockaddr_in endpoint_addr;
|
||||||
|
|
||||||
|
btl_endpoint->endpoint_sd = socket(AF_INET, SOCK_STREAM, 0);
|
||||||
|
if (btl_endpoint->endpoint_sd < 0) {
|
||||||
|
btl_endpoint->endpoint_retries++;
|
||||||
|
return OMPI_ERR_UNREACH;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* setup socket buffer sizes */
|
||||||
|
mca_btl_tcp_set_socket_options(btl_endpoint->endpoint_sd);
|
||||||
|
|
||||||
|
/* setup event callbacks */
|
||||||
|
mca_btl_tcp_endpoint_event_init(btl_endpoint, btl_endpoint->endpoint_sd);
|
||||||
|
|
||||||
|
/* setup the socket as non-blocking */
|
||||||
|
if((flags = fcntl(btl_endpoint->endpoint_sd, F_GETFL, 0)) < 0) {
|
||||||
|
BTL_ERROR(("fcntl(F_GETFL) failed with errno=%d", ompi_socket_errno));
|
||||||
|
} else {
|
||||||
|
flags |= O_NONBLOCK;
|
||||||
|
if(fcntl(btl_endpoint->endpoint_sd, F_SETFL, flags) < 0)
|
||||||
|
BTL_ERROR(("fcntl(F_SETFL) failed with errno=%d", ompi_socket_errno));
|
||||||
|
}
|
||||||
|
|
||||||
|
/* start the connect - will likely fail with EINPROGRESS */
|
||||||
|
endpoint_addr.sin_family = AF_INET;
|
||||||
|
endpoint_addr.sin_addr = btl_endpoint->endpoint_addr->addr_inet;
|
||||||
|
endpoint_addr.sin_port = btl_endpoint->endpoint_addr->addr_port;
|
||||||
|
if(connect(btl_endpoint->endpoint_sd, (struct sockaddr*)&endpoint_addr, sizeof(endpoint_addr)) < 0) {
|
||||||
|
/* non-blocking so wait for completion */
|
||||||
|
IMPORTANT_WINDOWS_COMMENT();
|
||||||
|
if(ompi_socket_errno == EINPROGRESS || ompi_socket_errno == EWOULDBLOCK) {
|
||||||
|
btl_endpoint->endpoint_state = MCA_BTL_TCP_CONNECTING;
|
||||||
|
opal_event_add(&btl_endpoint->endpoint_send_event, 0);
|
||||||
|
return OMPI_SUCCESS;
|
||||||
|
}
|
||||||
|
mca_btl_tcp_endpoint_close(btl_endpoint);
|
||||||
|
btl_endpoint->endpoint_retries++;
|
||||||
|
return OMPI_ERR_UNREACH;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* send our globally unique process identifier to the endpoint */
|
||||||
|
if((rc = mca_btl_tcp_endpoint_send_connect_ack(btl_endpoint)) == OMPI_SUCCESS) {
|
||||||
|
btl_endpoint->endpoint_state = MCA_BTL_TCP_CONNECT_ACK;
|
||||||
|
opal_event_add(&btl_endpoint->endpoint_recv_event, 0);
|
||||||
|
} else {
|
||||||
|
mca_btl_tcp_endpoint_close(btl_endpoint);
|
||||||
|
}
|
||||||
|
return rc;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Check the status of the connection. If the connection failed, will retry
|
||||||
|
* later. Otherwise, send this processes identifier to the endpoint on the
|
||||||
|
* newly connected socket.
|
||||||
|
*/
|
||||||
|
|
||||||
|
static void mca_btl_tcp_endpoint_complete_connect(mca_btl_base_endpoint_t* btl_endpoint)
|
||||||
|
{
|
||||||
|
int so_error = 0;
|
||||||
|
ompi_socklen_t so_length = sizeof(so_error);
|
||||||
|
|
||||||
|
/* unregister from receiving event notifications */
|
||||||
|
opal_event_del(&btl_endpoint->endpoint_send_event);
|
||||||
|
|
||||||
|
/* check connect completion status */
|
||||||
|
if(getsockopt(btl_endpoint->endpoint_sd, SOL_SOCKET, SO_ERROR, (char *)&so_error, &so_length) < 0) {
|
||||||
|
BTL_ERROR(("getsockopt() failed with errno=%d", ompi_socket_errno));
|
||||||
|
mca_btl_tcp_endpoint_close(btl_endpoint);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
IMPORTANT_WINDOWS_COMMENT();
|
||||||
|
if(so_error == EINPROGRESS || so_error == EWOULDBLOCK) {
|
||||||
|
opal_event_add(&btl_endpoint->endpoint_send_event, 0);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if(so_error != 0) {
|
||||||
|
BTL_ERROR(("connect() failed with errno=%d", so_error));
|
||||||
|
mca_btl_tcp_endpoint_close(btl_endpoint);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if(mca_btl_tcp_endpoint_send_connect_ack(btl_endpoint) == OMPI_SUCCESS) {
|
||||||
|
btl_endpoint->endpoint_state = MCA_BTL_TCP_CONNECT_ACK;
|
||||||
|
opal_event_add(&btl_endpoint->endpoint_recv_event, 0);
|
||||||
|
} else {
|
||||||
|
mca_btl_tcp_endpoint_close(btl_endpoint);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* A file descriptor is available/ready for recv. Check the state
|
||||||
|
* of the socket and take the appropriate action.
|
||||||
|
*/
|
||||||
|
|
||||||
|
static void mca_btl_tcp_endpoint_recv_handler(int sd, short flags, void* user)
|
||||||
|
{
|
||||||
|
mca_btl_base_endpoint_t* btl_endpoint = (mca_btl_base_endpoint_t *)user;
|
||||||
|
OPAL_THREAD_LOCK(&btl_endpoint->endpoint_recv_lock);
|
||||||
|
switch(btl_endpoint->endpoint_state) {
|
||||||
|
case MCA_BTL_TCP_CONNECT_ACK:
|
||||||
|
{
|
||||||
|
mca_btl_tcp_endpoint_recv_connect_ack(btl_endpoint);
|
||||||
|
OPAL_THREAD_UNLOCK(&btl_endpoint->endpoint_recv_lock);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case MCA_BTL_TCP_CONNECTED:
|
||||||
|
{
|
||||||
|
mca_btl_tcp_frag_t* frag = btl_endpoint->endpoint_recv_frag;
|
||||||
|
if(NULL == frag) {
|
||||||
|
int rc;
|
||||||
|
MCA_BTL_TCP_FRAG_ALLOC_MAX(frag, rc);
|
||||||
|
if(NULL == frag) {
|
||||||
|
OPAL_THREAD_UNLOCK(&btl_endpoint->endpoint_recv_lock);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
MCA_BTL_TCP_FRAG_INIT_DST(frag, btl_endpoint);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* check for completion of non-blocking recv on the current fragment */
|
||||||
|
if(mca_btl_tcp_frag_recv(frag, sd) == false) {
|
||||||
|
btl_endpoint->endpoint_recv_frag = frag;
|
||||||
|
OPAL_THREAD_UNLOCK(&btl_endpoint->endpoint_recv_lock);
|
||||||
|
} else {
|
||||||
|
btl_endpoint->endpoint_recv_frag = NULL;
|
||||||
|
OPAL_THREAD_UNLOCK(&btl_endpoint->endpoint_recv_lock);
|
||||||
|
switch(frag->hdr.type) {
|
||||||
|
case MCA_BTL_TCP_HDR_TYPE_SEND:
|
||||||
|
{
|
||||||
|
mca_btl_base_recv_reg_t* reg = frag->btl->tcp_reg + frag->hdr.base.tag;
|
||||||
|
reg->cbfunc(&frag->btl->super, frag->hdr.base.tag, &frag->base, reg->cbdata);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
{
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
MCA_BTL_TCP_FRAG_RETURN_MAX(frag);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case MCA_BTL_TCP_SHUTDOWN:
|
||||||
|
{
|
||||||
|
OPAL_THREAD_UNLOCK(&btl_endpoint->endpoint_recv_lock);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
{
|
||||||
|
OPAL_THREAD_UNLOCK(&btl_endpoint->endpoint_recv_lock);
|
||||||
|
BTL_ERROR(("invalid socket state(%d)", btl_endpoint->endpoint_state));
|
||||||
|
mca_btl_tcp_endpoint_close(btl_endpoint);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* A file descriptor is available/ready for send. Check the state
|
||||||
|
* of the socket and take the appropriate action.
|
||||||
|
*/
|
||||||
|
|
||||||
|
static void mca_btl_tcp_endpoint_send_handler(int sd, short flags, void* user)
|
||||||
|
{
|
||||||
|
mca_btl_tcp_endpoint_t* btl_endpoint = (mca_btl_tcp_endpoint_t *)user;
|
||||||
|
OPAL_THREAD_LOCK(&btl_endpoint->endpoint_send_lock);
|
||||||
|
switch(btl_endpoint->endpoint_state) {
|
||||||
|
case MCA_BTL_TCP_CONNECTING:
|
||||||
|
mca_btl_tcp_endpoint_complete_connect(btl_endpoint);
|
||||||
|
break;
|
||||||
|
case MCA_BTL_TCP_CONNECTED:
|
||||||
|
{
|
||||||
|
/* complete the current send */
|
||||||
|
do {
|
||||||
|
mca_btl_tcp_frag_t* frag = btl_endpoint->endpoint_send_frag;
|
||||||
|
if(mca_btl_tcp_frag_send(frag, btl_endpoint->endpoint_sd) == false) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* if required - update request status and release fragment */
|
||||||
|
OPAL_THREAD_UNLOCK(&btl_endpoint->endpoint_send_lock);
|
||||||
|
frag->base.des_cbfunc(&frag->btl->super, frag->endpoint, &frag->base, frag->rc);
|
||||||
|
OPAL_THREAD_LOCK(&btl_endpoint->endpoint_send_lock);
|
||||||
|
|
||||||
|
/* progress any pending sends */
|
||||||
|
btl_endpoint->endpoint_send_frag = (mca_btl_tcp_frag_t*)
|
||||||
|
opal_list_remove_first(&btl_endpoint->endpoint_frags);
|
||||||
|
} while (NULL != btl_endpoint->endpoint_send_frag);
|
||||||
|
|
||||||
|
/* if nothing else to do unregister for send event notifications */
|
||||||
|
if(NULL == btl_endpoint->endpoint_send_frag) {
|
||||||
|
opal_event_del(&btl_endpoint->endpoint_send_event);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
BTL_ERROR(("invalid connection state (%d)",
|
||||||
|
btl_endpoint->endpoint_state));
|
||||||
|
opal_event_del(&btl_endpoint->endpoint_send_event);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
OPAL_THREAD_UNLOCK(&btl_endpoint->endpoint_send_lock);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
83
ompi/mca/btl/tcp/btl_tcp_endpoint.h
Обычный файл
83
ompi/mca/btl/tcp/btl_tcp_endpoint.h
Обычный файл
@ -0,0 +1,83 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2004-2005 The Trustees of Indiana University.
|
||||||
|
* All rights reserved.
|
||||||
|
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
|
||||||
|
* All rights reserved.
|
||||||
|
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||||
|
* University of Stuttgart. All rights reserved.
|
||||||
|
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||||
|
* All rights reserved.
|
||||||
|
* $COPYRIGHT$
|
||||||
|
*
|
||||||
|
* Additional copyrights may follow
|
||||||
|
*
|
||||||
|
* $HEADER$
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef MCA_BTL_TCP_ENDPOINT_H
|
||||||
|
#define MCA_BTL_TCP_ENDPOINT_H
|
||||||
|
|
||||||
|
#include "opal/class/opal_list.h"
|
||||||
|
#include "opal/event/event.h"
|
||||||
|
#include "ompi/mca/pml/pml.h"
|
||||||
|
#include "ompi/mca/btl/btl.h"
|
||||||
|
#include "btl_tcp_frag.h"
|
||||||
|
#include "btl_tcp.h"
|
||||||
|
#if defined(c_plusplus) || defined(__cplusplus)
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
OBJ_CLASS_DECLARATION(mca_btl_tcp_endpoint_t);
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* State of TCP endpoint connection.
|
||||||
|
*/
|
||||||
|
|
||||||
|
typedef enum {
|
||||||
|
MCA_BTL_TCP_CLOSED,
|
||||||
|
MCA_BTL_TCP_CONNECTING,
|
||||||
|
MCA_BTL_TCP_CONNECT_ACK,
|
||||||
|
MCA_BTL_TCP_CONNECTED,
|
||||||
|
MCA_BTL_TCP_SHUTDOWN,
|
||||||
|
MCA_BTL_TCP_FAILED
|
||||||
|
} mca_btl_tcp_state_t;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* An abstraction that represents a connection to a endpoint process.
|
||||||
|
* An instance of mca_btl_base_endpoint_t is associated w/ each process
|
||||||
|
* and BTL pair at startup. However, connections to the endpoint
|
||||||
|
* are established dynamically on an as-needed basis:
|
||||||
|
*/
|
||||||
|
|
||||||
|
struct mca_btl_base_endpoint_t {
|
||||||
|
opal_list_item_t super;
|
||||||
|
struct mca_btl_tcp_module_t* endpoint_btl; /**< BTL instance that created this connection */
|
||||||
|
struct mca_btl_tcp_proc_t* endpoint_proc; /**< proc structure corresponding to endpoint */
|
||||||
|
struct mca_btl_tcp_addr_t* endpoint_addr; /**< address of endpoint */
|
||||||
|
int endpoint_sd; /**< socket connection to endpoint */
|
||||||
|
struct mca_btl_tcp_frag_t* endpoint_send_frag; /**< current send frag being processed */
|
||||||
|
struct mca_btl_tcp_frag_t* endpoint_recv_frag; /**< current recv frag being processed */
|
||||||
|
mca_btl_tcp_state_t endpoint_state; /**< current state of the connection */
|
||||||
|
size_t endpoint_retries; /**< number of connection retries attempted */
|
||||||
|
opal_list_t endpoint_frags; /**< list of pending frags to send */
|
||||||
|
opal_mutex_t endpoint_send_lock; /**< lock for concurrent access to endpoint state */
|
||||||
|
opal_mutex_t endpoint_recv_lock; /**< lock for concurrent access to endpoint state */
|
||||||
|
opal_event_t endpoint_send_event; /**< event for async processing of send frags */
|
||||||
|
opal_event_t endpoint_recv_event; /**< event for async processing of recv frags */
|
||||||
|
bool endpoint_nbo; /**< convert headers to network byte order? */
|
||||||
|
};
|
||||||
|
|
||||||
|
typedef struct mca_btl_base_endpoint_t mca_btl_base_endpoint_t;
|
||||||
|
typedef mca_btl_base_endpoint_t mca_btl_tcp_endpoint_t;
|
||||||
|
|
||||||
|
void mca_btl_tcp_set_socket_options(int sd);
|
||||||
|
void mca_btl_tcp_endpoint_close(mca_btl_base_endpoint_t*);
|
||||||
|
int mca_btl_tcp_endpoint_send(mca_btl_base_endpoint_t*, struct mca_btl_tcp_frag_t*);
|
||||||
|
bool mca_btl_tcp_endpoint_accept(mca_btl_base_endpoint_t*, struct sockaddr_in*, int);
|
||||||
|
void mca_btl_tcp_endpoint_shutdown(mca_btl_base_endpoint_t*);
|
||||||
|
|
||||||
|
#if defined(c_plusplus) || defined(__cplusplus)
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
#endif
|
170
ompi/mca/btl/tcp/btl_tcp_frag.c
Обычный файл
170
ompi/mca/btl/tcp/btl_tcp_frag.c
Обычный файл
@ -0,0 +1,170 @@
|
|||||||
|
#include "ompi_config.h"
|
||||||
|
#include "include/ompi_socket_errno.h"
|
||||||
|
#include "ompi/mca/btl/base/btl_base_error.h"
|
||||||
|
#include "btl_tcp_frag.h"
|
||||||
|
#include "btl_tcp_endpoint.h"
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
static void mca_btl_tcp_frag_common_constructor(mca_btl_tcp_frag_t* frag)
|
||||||
|
{
|
||||||
|
frag->base.des_src = NULL;
|
||||||
|
frag->base.des_src_cnt = 0;
|
||||||
|
frag->base.des_dst = NULL;
|
||||||
|
frag->base.des_dst_cnt = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void mca_btl_tcp_frag_eager_constructor(mca_btl_tcp_frag_t* frag)
|
||||||
|
{
|
||||||
|
frag->size = mca_btl_tcp_module.super.btl_eager_limit;
|
||||||
|
mca_btl_tcp_frag_common_constructor(frag);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void mca_btl_tcp_frag_max_constructor(mca_btl_tcp_frag_t* frag)
|
||||||
|
{
|
||||||
|
frag->size = mca_btl_tcp_module.super.btl_max_send_size;
|
||||||
|
mca_btl_tcp_frag_common_constructor(frag);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void mca_btl_tcp_frag_user_constructor(mca_btl_tcp_frag_t* frag)
|
||||||
|
{
|
||||||
|
frag->size = 0;
|
||||||
|
mca_btl_tcp_frag_common_constructor(frag);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
OBJ_CLASS_INSTANCE(
|
||||||
|
mca_btl_tcp_frag_t,
|
||||||
|
mca_btl_base_descriptor_t,
|
||||||
|
NULL,
|
||||||
|
NULL);
|
||||||
|
|
||||||
|
OBJ_CLASS_INSTANCE(
|
||||||
|
mca_btl_tcp_frag_eager_t,
|
||||||
|
mca_btl_base_descriptor_t,
|
||||||
|
mca_btl_tcp_frag_eager_constructor,
|
||||||
|
NULL);
|
||||||
|
|
||||||
|
OBJ_CLASS_INSTANCE(
|
||||||
|
mca_btl_tcp_frag_max_t,
|
||||||
|
mca_btl_base_descriptor_t,
|
||||||
|
mca_btl_tcp_frag_max_constructor,
|
||||||
|
NULL);
|
||||||
|
|
||||||
|
OBJ_CLASS_INSTANCE(
|
||||||
|
mca_btl_tcp_frag_user_t,
|
||||||
|
mca_btl_base_descriptor_t,
|
||||||
|
mca_btl_tcp_frag_user_constructor,
|
||||||
|
NULL);
|
||||||
|
|
||||||
|
|
||||||
|
bool mca_btl_tcp_frag_send(mca_btl_tcp_frag_t* frag, int sd)
|
||||||
|
{
|
||||||
|
int cnt=-1;
|
||||||
|
size_t i, num_vecs;
|
||||||
|
|
||||||
|
/* non-blocking write, but continue if interrupted */
|
||||||
|
while(cnt < 0) {
|
||||||
|
cnt = writev(sd, frag->iov_ptr, frag->iov_cnt);
|
||||||
|
if(cnt < 0) {
|
||||||
|
switch(ompi_socket_errno) {
|
||||||
|
case EINTR:
|
||||||
|
continue;
|
||||||
|
case EWOULDBLOCK:
|
||||||
|
/* opal_output(0, "mca_btl_tcp_frag_send: EWOULDBLOCK\n"); */
|
||||||
|
return false;
|
||||||
|
case EFAULT:
|
||||||
|
BTL_ERROR(("writev error (%p, %d)\n\t%s(%d)\n",
|
||||||
|
frag->iov_ptr[0].iov_base, frag->iov_ptr[0].iov_len,
|
||||||
|
strerror(ompi_socket_errno), frag->iov_cnt));
|
||||||
|
default:
|
||||||
|
{
|
||||||
|
BTL_ERROR(("writev failed with errno=%d", ompi_socket_errno));
|
||||||
|
mca_btl_tcp_endpoint_close(frag->endpoint);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* if the write didn't complete - update the iovec state */
|
||||||
|
num_vecs = frag->iov_cnt;
|
||||||
|
for(i=0; i<num_vecs; i++) {
|
||||||
|
if(cnt >= (int)frag->iov_ptr->iov_len) {
|
||||||
|
cnt -= frag->iov_ptr->iov_len;
|
||||||
|
frag->iov_ptr++;
|
||||||
|
frag->iov_idx++;
|
||||||
|
frag->iov_cnt--;
|
||||||
|
} else {
|
||||||
|
frag->iov_ptr->iov_base = (ompi_iov_base_ptr_t)
|
||||||
|
(((unsigned char*)frag->iov_ptr->iov_base) + cnt);
|
||||||
|
frag->iov_ptr->iov_len -= cnt;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return (frag->iov_cnt == 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
bool mca_btl_tcp_frag_recv(mca_btl_tcp_frag_t* frag, int sd)
|
||||||
|
{
|
||||||
|
int cnt=-1;
|
||||||
|
size_t i, num_vecs;
|
||||||
|
|
||||||
|
/* non-blocking read, but continue if interrupted */
|
||||||
|
while(cnt < 0) {
|
||||||
|
cnt = readv(sd, frag->iov_ptr, frag->iov_cnt);
|
||||||
|
if(cnt < 0) {
|
||||||
|
switch(ompi_socket_errno) {
|
||||||
|
case EINTR:
|
||||||
|
continue;
|
||||||
|
case EWOULDBLOCK:
|
||||||
|
return false;
|
||||||
|
case EFAULT:
|
||||||
|
opal_output( 0, "mca_btl_tcp_frag_send: writev error (%p, %d)\n\t%s(%d)\n",
|
||||||
|
frag->iov_ptr[0].iov_base, frag->iov_ptr[0].iov_len,
|
||||||
|
strerror(ompi_socket_errno), frag->iov_cnt );
|
||||||
|
default:
|
||||||
|
{
|
||||||
|
opal_output(0, "mca_btl_tcp_frag_send: writev failed with errno=%d",
|
||||||
|
ompi_socket_errno);
|
||||||
|
mca_btl_tcp_endpoint_close(frag->endpoint);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* if the write didn't complete - update the iovec state */
|
||||||
|
num_vecs = frag->iov_cnt;
|
||||||
|
for(i=0; i<num_vecs; i++) {
|
||||||
|
if(cnt >= (int)frag->iov_ptr->iov_len) {
|
||||||
|
cnt -= frag->iov_ptr->iov_len;
|
||||||
|
frag->iov_idx++;
|
||||||
|
frag->iov_ptr++;
|
||||||
|
frag->iov_cnt--;
|
||||||
|
} else {
|
||||||
|
frag->iov_ptr->iov_base = (ompi_iov_base_ptr_t)
|
||||||
|
(((unsigned char*)frag->iov_ptr->iov_base) + cnt);
|
||||||
|
frag->iov_ptr->iov_len -= cnt;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* read header */
|
||||||
|
if(frag->iov_cnt == 0 && frag->iov_idx == 1) {
|
||||||
|
switch(frag->hdr.type) {
|
||||||
|
case MCA_BTL_TCP_HDR_TYPE_SEND:
|
||||||
|
frag->iov[1].iov_base = (frag+1);
|
||||||
|
frag->iov[1].iov_len = frag->hdr.size;
|
||||||
|
frag->segments[0].seg_addr.pval = frag+1;
|
||||||
|
frag->segments[0].seg_len = frag->hdr.size;
|
||||||
|
frag->iov_cnt++;
|
||||||
|
return false;
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return (frag->iov_cnt == 0);
|
||||||
|
}
|
||||||
|
|
157
ompi/mca/btl/tcp/btl_tcp_frag.h
Обычный файл
157
ompi/mca/btl/tcp/btl_tcp_frag.h
Обычный файл
@ -0,0 +1,157 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2004-2005 The Trustees of Indiana University.
|
||||||
|
* All rights reserved.
|
||||||
|
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
|
||||||
|
* All rights reserved.
|
||||||
|
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||||
|
* University of Stuttgart. All rights reserved.
|
||||||
|
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||||
|
* All rights reserved.
|
||||||
|
* $COPYRIGHT$
|
||||||
|
*
|
||||||
|
* Additional copyrights may follow
|
||||||
|
*
|
||||||
|
* $HEADER$
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef MCA_BTL_TCP_FRAG_H
|
||||||
|
#define MCA_BTL_TCP_FRAG_H
|
||||||
|
|
||||||
|
|
||||||
|
#define MCA_BTL_TCP_FRAG_ALIGN (8)
|
||||||
|
#include "ompi_config.h"
|
||||||
|
#include "btl_tcp.h"
|
||||||
|
#include "btl_tcp_hdr.h"
|
||||||
|
|
||||||
|
#if defined(c_plusplus) || defined(__cplusplus)
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
OMPI_DECLSPEC OBJ_CLASS_DECLARATION(mca_btl_tcp_frag_t);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* TCP fragment derived type.
|
||||||
|
*/
|
||||||
|
struct mca_btl_tcp_frag_t {
|
||||||
|
mca_btl_base_descriptor_t base;
|
||||||
|
mca_btl_base_segment_t segments[2];
|
||||||
|
struct mca_btl_base_endpoint_t *endpoint;
|
||||||
|
struct mca_btl_tcp_module_t* btl;
|
||||||
|
mca_btl_tcp_hdr_t hdr;
|
||||||
|
struct iovec iov[3];
|
||||||
|
struct iovec *iov_ptr;
|
||||||
|
size_t iov_cnt;
|
||||||
|
size_t iov_idx;
|
||||||
|
size_t size;
|
||||||
|
int rc;
|
||||||
|
};
|
||||||
|
typedef struct mca_btl_tcp_frag_t mca_btl_tcp_frag_t;
|
||||||
|
OBJ_CLASS_DECLARATION(mca_btl_tcp_frag_t);
|
||||||
|
|
||||||
|
|
||||||
|
typedef struct mca_btl_tcp_frag_t mca_btl_tcp_frag_eager_t;
|
||||||
|
|
||||||
|
OBJ_CLASS_DECLARATION(mca_btl_tcp_frag_eager_t);
|
||||||
|
|
||||||
|
typedef struct mca_btl_tcp_frag_t mca_btl_tcp_frag_max_t;
|
||||||
|
|
||||||
|
OBJ_CLASS_DECLARATION(mca_btl_tcp_frag_max_t);
|
||||||
|
|
||||||
|
typedef struct mca_btl_tcp_frag_t mca_btl_tcp_frag_user_t;
|
||||||
|
|
||||||
|
OBJ_CLASS_DECLARATION(mca_btl_tcp_frag_user_t);
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Macros to allocate/return descriptors from module specific
|
||||||
|
* free list(s).
|
||||||
|
*/
|
||||||
|
|
||||||
|
#define MCA_BTL_TCP_FRAG_ALLOC_EAGER(frag, rc) \
|
||||||
|
{ \
|
||||||
|
\
|
||||||
|
opal_list_item_t *item; \
|
||||||
|
OMPI_FREE_LIST_WAIT(&mca_btl_tcp_component.tcp_frag_eager, item, rc); \
|
||||||
|
frag = (mca_btl_tcp_frag_t*) item; \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define MCA_BTL_TCP_FRAG_RETURN_EAGER(frag) \
|
||||||
|
{ \
|
||||||
|
OMPI_FREE_LIST_RETURN(&mca_btl_tcp_component.tcp_frag_eager, \
|
||||||
|
(opal_list_item_t*)(frag)); \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define MCA_BTL_TCP_FRAG_ALLOC_MAX(frag, rc) \
|
||||||
|
{ \
|
||||||
|
\
|
||||||
|
opal_list_item_t *item; \
|
||||||
|
OMPI_FREE_LIST_WAIT(&mca_btl_tcp_component.tcp_frag_max, item, rc); \
|
||||||
|
frag = (mca_btl_tcp_frag_t*) item; \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define MCA_BTL_TCP_FRAG_RETURN_MAX(frag) \
|
||||||
|
{ \
|
||||||
|
OMPI_FREE_LIST_RETURN(&mca_btl_tcp_component.tcp_frag_max, \
|
||||||
|
(opal_list_item_t*)(frag)); \
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#define MCA_BTL_TCP_FRAG_ALLOC_USER(frag, rc) \
|
||||||
|
{ \
|
||||||
|
opal_list_item_t *item; \
|
||||||
|
OMPI_FREE_LIST_WAIT(&mca_btl_tcp_component.tcp_frag_user, item, rc); \
|
||||||
|
frag = (mca_btl_tcp_frag_t*) item; \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define MCA_BTL_TCP_FRAG_RETURN_USER(frag) \
|
||||||
|
{ \
|
||||||
|
OMPI_FREE_LIST_RETURN(&mca_btl_tcp_component.tcp_frag_user, \
|
||||||
|
(opal_list_item_t*)(frag)); \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define MCA_BTL_TCP_FRAG_INIT_SRC(frag,endpoint) \
|
||||||
|
do { \
|
||||||
|
size_t i; \
|
||||||
|
frag->rc = 0; \
|
||||||
|
frag->hdr.size = 0; \
|
||||||
|
frag->iov_idx = 0; \
|
||||||
|
frag->endpoint = endpoint; \
|
||||||
|
frag->hdr.size = 0; \
|
||||||
|
frag->iov_cnt = 1; \
|
||||||
|
frag->iov_ptr = frag->iov; \
|
||||||
|
frag->iov[0].iov_base = &frag->hdr; \
|
||||||
|
frag->iov[0].iov_len = sizeof(frag->hdr); \
|
||||||
|
for(i=0; i<frag->base.des_src_cnt; i++) { \
|
||||||
|
frag->hdr.size += frag->segments[i].seg_len; \
|
||||||
|
frag->iov[i+1].iov_len = frag->segments[i].seg_len; \
|
||||||
|
frag->iov[i+1].iov_base = frag->segments[i].seg_addr.pval; \
|
||||||
|
frag->iov_cnt++; \
|
||||||
|
} \
|
||||||
|
} while(0)
|
||||||
|
|
||||||
|
#define MCA_BTL_TCP_FRAG_INIT_DST(frag,ep) \
|
||||||
|
do { \
|
||||||
|
frag->rc = 0; \
|
||||||
|
frag->btl = ep->endpoint_btl; \
|
||||||
|
frag->endpoint = ep; \
|
||||||
|
frag->iov[0].iov_len = sizeof(frag->hdr); \
|
||||||
|
frag->iov[0].iov_base = &frag->hdr; \
|
||||||
|
frag->iov_cnt = 1; \
|
||||||
|
frag->iov_idx = 0; \
|
||||||
|
frag->iov_ptr = frag->iov; \
|
||||||
|
frag->base.des_src = NULL; \
|
||||||
|
frag->base.des_dst_cnt = 0; \
|
||||||
|
frag->base.des_dst = frag->segments; \
|
||||||
|
frag->base.des_dst_cnt = 1; \
|
||||||
|
} while(0)
|
||||||
|
|
||||||
|
|
||||||
|
bool mca_btl_tcp_frag_send(mca_btl_tcp_frag_t*, int sd);
|
||||||
|
bool mca_btl_tcp_frag_recv(mca_btl_tcp_frag_t*, int sd);
|
||||||
|
|
||||||
|
|
||||||
|
#if defined(c_plusplus) || defined(__cplusplus)
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
#endif
|
48
ompi/mca/btl/tcp/btl_tcp_hdr.h
Обычный файл
48
ompi/mca/btl/tcp/btl_tcp_hdr.h
Обычный файл
@ -0,0 +1,48 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2004-2005 The Trustees of Indiana University.
|
||||||
|
* All rights reserved.
|
||||||
|
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
|
||||||
|
* All rights reserved.
|
||||||
|
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||||
|
* University of Stuttgart. All rights reserved.
|
||||||
|
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||||
|
* All rights reserved.
|
||||||
|
* $COPYRIGHT$
|
||||||
|
*
|
||||||
|
* Additional copyrights may follow
|
||||||
|
*
|
||||||
|
* $HEADER$
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef MCA_BTL_TCP_HDR_H
|
||||||
|
#define MCA_BTL_TCP_HDR_H
|
||||||
|
|
||||||
|
|
||||||
|
#include "ompi_config.h"
|
||||||
|
#include "mca/btl/base/base.h"
|
||||||
|
#include "btl_tcp.h"
|
||||||
|
|
||||||
|
#if defined(c_plusplus) || defined(__cplusplus)
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/**
|
||||||
|
* TCP header.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#define MCA_BTL_TCP_HDR_TYPE_SEND 1
|
||||||
|
#define MCA_BTL_TCP_HDR_TYPE_PUT 2
|
||||||
|
#define MCA_BTL_TCP_HDR_TYPE_GET 3
|
||||||
|
|
||||||
|
|
||||||
|
struct mca_btl_tcp_hdr_t {
|
||||||
|
mca_btl_base_header_t base;
|
||||||
|
uint8_t type;
|
||||||
|
uint64_t size;
|
||||||
|
};
|
||||||
|
typedef struct mca_btl_tcp_hdr_t mca_btl_tcp_hdr_t;
|
||||||
|
|
||||||
|
#if defined(c_plusplus) || defined(__cplusplus)
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
#endif
|
233
ompi/mca/btl/tcp/btl_tcp_proc.c
Обычный файл
233
ompi/mca/btl/tcp/btl_tcp_proc.c
Обычный файл
@ -0,0 +1,233 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2004-2005 The Trustees of Indiana University.
|
||||||
|
* All rights reserved.
|
||||||
|
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
|
||||||
|
* All rights reserved.
|
||||||
|
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||||
|
* University of Stuttgart. All rights reserved.
|
||||||
|
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||||
|
* All rights reserved.
|
||||||
|
* $COPYRIGHT$
|
||||||
|
*
|
||||||
|
* Additional copyrights may follow
|
||||||
|
*
|
||||||
|
* $HEADER$
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "ompi_config.h"
|
||||||
|
|
||||||
|
#include "orte/class/orte_proc_table.h"
|
||||||
|
#include "ompi/mca/btl/base/btl_base_error.h"
|
||||||
|
#include "ompi/mca/pml/base/pml_base_module_exchange.h"
|
||||||
|
|
||||||
|
#include "btl_tcp.h"
|
||||||
|
#include "btl_tcp_proc.h"
|
||||||
|
|
||||||
|
static void mca_btl_tcp_proc_construct(mca_btl_tcp_proc_t* proc);
|
||||||
|
static void mca_btl_tcp_proc_destruct(mca_btl_tcp_proc_t* proc);
|
||||||
|
|
||||||
|
|
||||||
|
OBJ_CLASS_INSTANCE(
|
||||||
|
mca_btl_tcp_proc_t,
|
||||||
|
opal_list_item_t,
|
||||||
|
mca_btl_tcp_proc_construct,
|
||||||
|
mca_btl_tcp_proc_destruct);
|
||||||
|
|
||||||
|
|
||||||
|
void mca_btl_tcp_proc_construct(mca_btl_tcp_proc_t* proc)
|
||||||
|
{
|
||||||
|
proc->proc_ompi = 0;
|
||||||
|
proc->proc_addrs = NULL;
|
||||||
|
proc->proc_addr_count = 0;
|
||||||
|
proc->proc_endpoints = NULL;
|
||||||
|
proc->proc_endpoint_count = 0;
|
||||||
|
OBJ_CONSTRUCT(&proc->proc_lock, opal_mutex_t);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Cleanup ib proc instance
|
||||||
|
*/
|
||||||
|
|
||||||
|
void mca_btl_tcp_proc_destruct(mca_btl_tcp_proc_t* proc)
|
||||||
|
{
|
||||||
|
/* remove from list of all proc instances */
|
||||||
|
OPAL_THREAD_LOCK(&mca_btl_tcp_component.tcp_lock);
|
||||||
|
orte_hash_table_remove_proc(&mca_btl_tcp_component.tcp_procs, &proc->proc_name);
|
||||||
|
OPAL_THREAD_UNLOCK(&mca_btl_tcp_component.tcp_lock);
|
||||||
|
|
||||||
|
/* release resources */
|
||||||
|
if(NULL != proc->proc_endpoints) {
|
||||||
|
free(proc->proc_endpoints);
|
||||||
|
OBJ_DESTRUCT(&proc->proc_lock);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Create a TCP process structure. There is a one-to-one correspondence
|
||||||
|
* between a ompi_proc_t and a mca_btl_tcp_proc_t instance. We cache
|
||||||
|
* additional data (specifically the list of mca_btl_tcp_endpoint_t instances,
|
||||||
|
* and published addresses) associated w/ a given destination on this
|
||||||
|
* datastructure.
|
||||||
|
*/
|
||||||
|
|
||||||
|
mca_btl_tcp_proc_t* mca_btl_tcp_proc_create(ompi_proc_t* ompi_proc)
|
||||||
|
{
|
||||||
|
int rc;
|
||||||
|
size_t size;
|
||||||
|
mca_btl_tcp_proc_t* btl_proc;
|
||||||
|
|
||||||
|
OPAL_THREAD_LOCK(&mca_btl_tcp_component.tcp_lock);
|
||||||
|
btl_proc = (mca_btl_tcp_proc_t*)orte_hash_table_get_proc(
|
||||||
|
&mca_btl_tcp_component.tcp_procs, &ompi_proc->proc_name);
|
||||||
|
if(NULL != btl_proc) {
|
||||||
|
OPAL_THREAD_UNLOCK(&mca_btl_tcp_component.tcp_lock);
|
||||||
|
return btl_proc;
|
||||||
|
}
|
||||||
|
|
||||||
|
btl_proc = OBJ_NEW(mca_btl_tcp_proc_t);
|
||||||
|
if(NULL == btl_proc)
|
||||||
|
return NULL;
|
||||||
|
btl_proc->proc_ompi = ompi_proc;
|
||||||
|
btl_proc->proc_name = ompi_proc->proc_name;
|
||||||
|
|
||||||
|
/* add to hash table of all proc instance */
|
||||||
|
orte_hash_table_set_proc(
|
||||||
|
&mca_btl_tcp_component.tcp_procs,
|
||||||
|
&btl_proc->proc_name,
|
||||||
|
btl_proc);
|
||||||
|
OPAL_THREAD_UNLOCK(&mca_btl_tcp_component.tcp_lock);
|
||||||
|
|
||||||
|
/* lookup tcp parameters exported by this proc */
|
||||||
|
rc = mca_base_modex_recv( &mca_btl_tcp_component.super.btl_version,
|
||||||
|
ompi_proc,
|
||||||
|
(void**)&btl_proc->proc_addrs,
|
||||||
|
&size);
|
||||||
|
if(rc != OMPI_SUCCESS) {
|
||||||
|
BTL_ERROR(("mca_base_modex_recv: failed with return value=%d", rc));
|
||||||
|
OBJ_RELEASE(btl_proc);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
if(0 != (size % sizeof(mca_btl_tcp_addr_t))) {
|
||||||
|
BTL_ERROR(("mca_base_modex_recv: invalid size %d\n", size));
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
btl_proc->proc_addr_count = size / sizeof(mca_btl_tcp_addr_t);
|
||||||
|
|
||||||
|
/* allocate space for endpoint array - one for each exported address */
|
||||||
|
btl_proc->proc_endpoints = (mca_btl_base_endpoint_t**)
|
||||||
|
malloc(btl_proc->proc_addr_count * sizeof(mca_btl_base_endpoint_t*));
|
||||||
|
if(NULL == btl_proc->proc_endpoints) {
|
||||||
|
OBJ_RELEASE(btl_proc);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
if(NULL == mca_btl_tcp_component.tcp_local && ompi_proc == ompi_proc_local())
|
||||||
|
mca_btl_tcp_component.tcp_local = btl_proc;
|
||||||
|
return btl_proc;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Note that this routine must be called with the lock on the process
|
||||||
|
* already held. Insert a btl instance into the proc array and assign
|
||||||
|
* it an address.
|
||||||
|
*/
|
||||||
|
int mca_btl_tcp_proc_insert(
|
||||||
|
mca_btl_tcp_proc_t* btl_proc,
|
||||||
|
mca_btl_base_endpoint_t* btl_endpoint)
|
||||||
|
{
|
||||||
|
struct mca_btl_tcp_module_t *btl_tcp = btl_endpoint->endpoint_btl;
|
||||||
|
size_t i;
|
||||||
|
unsigned long net1;
|
||||||
|
|
||||||
|
/* insert into endpoint array */
|
||||||
|
btl_endpoint->endpoint_proc = btl_proc;
|
||||||
|
btl_proc->proc_endpoints[btl_proc->proc_endpoint_count++] = btl_endpoint;
|
||||||
|
|
||||||
|
net1 = btl_tcp->tcp_ifaddr.sin_addr.s_addr & btl_tcp->tcp_ifmask.sin_addr.s_addr;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Look through the proc instance for an address that is on the
|
||||||
|
* directly attached network. If we don't find one, pick the first
|
||||||
|
* unused address.
|
||||||
|
*/
|
||||||
|
for(i=0; i<btl_proc->proc_addr_count; i++) {
|
||||||
|
mca_btl_tcp_addr_t* endpoint_addr = btl_proc->proc_addrs + i;
|
||||||
|
unsigned long net2 = endpoint_addr->addr_inet.s_addr & btl_tcp->tcp_ifmask.sin_addr.s_addr;
|
||||||
|
if(endpoint_addr->addr_inuse != 0)
|
||||||
|
continue;
|
||||||
|
if(net1 == net2) {
|
||||||
|
btl_endpoint->endpoint_addr = endpoint_addr;
|
||||||
|
break;
|
||||||
|
} else if(btl_endpoint->endpoint_addr != 0)
|
||||||
|
btl_endpoint->endpoint_addr = endpoint_addr;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Make sure there is a common interface */
|
||||||
|
if( NULL != btl_endpoint->endpoint_addr ) {
|
||||||
|
btl_endpoint->endpoint_addr->addr_inuse++;
|
||||||
|
return OMPI_SUCCESS;
|
||||||
|
}
|
||||||
|
return OMPI_ERR_UNREACH;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Remove an endpoint from the proc array and indicate the address is
|
||||||
|
* no longer in use.
|
||||||
|
*/
|
||||||
|
|
||||||
|
int mca_btl_tcp_proc_remove(mca_btl_tcp_proc_t* btl_proc, mca_btl_base_endpoint_t* btl_endpoint)
|
||||||
|
{
|
||||||
|
size_t i;
|
||||||
|
OPAL_THREAD_LOCK(&btl_proc->proc_lock);
|
||||||
|
for(i=0; i<btl_proc->proc_endpoint_count; i++) {
|
||||||
|
if(btl_proc->proc_endpoints[i] == btl_endpoint) {
|
||||||
|
memmove(btl_proc->proc_endpoints+i, btl_proc->proc_endpoints+i+1,
|
||||||
|
(btl_proc->proc_endpoint_count-i-1)*sizeof(mca_btl_base_endpoint_t*));
|
||||||
|
if(--btl_proc->proc_endpoint_count == 0) {
|
||||||
|
OPAL_THREAD_UNLOCK(&btl_proc->proc_lock);
|
||||||
|
OBJ_RELEASE(btl_proc);
|
||||||
|
return OMPI_SUCCESS;
|
||||||
|
}
|
||||||
|
btl_endpoint->endpoint_addr->addr_inuse--;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
OPAL_THREAD_UNLOCK(&btl_proc->proc_lock);
|
||||||
|
return OMPI_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Look for an existing TCP process instance based on the globally unique
|
||||||
|
* process identifier.
|
||||||
|
*/
|
||||||
|
mca_btl_tcp_proc_t* mca_btl_tcp_proc_lookup(const orte_process_name_t *name)
|
||||||
|
{
|
||||||
|
mca_btl_tcp_proc_t* proc;
|
||||||
|
OPAL_THREAD_LOCK(&mca_btl_tcp_component.tcp_lock);
|
||||||
|
proc = (mca_btl_tcp_proc_t*)orte_hash_table_get_proc(
|
||||||
|
&mca_btl_tcp_component.tcp_procs, name);
|
||||||
|
OPAL_THREAD_UNLOCK(&mca_btl_tcp_component.tcp_lock);
|
||||||
|
return proc;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* loop through all available PTLs for one matching the source address
|
||||||
|
* of the request.
|
||||||
|
*/
|
||||||
|
bool mca_btl_tcp_proc_accept(mca_btl_tcp_proc_t* btl_proc, struct sockaddr_in* addr, int sd)
|
||||||
|
{
|
||||||
|
size_t i;
|
||||||
|
OPAL_THREAD_LOCK(&btl_proc->proc_lock);
|
||||||
|
for(i=0; i<btl_proc->proc_endpoint_count; i++) {
|
||||||
|
mca_btl_base_endpoint_t* btl_endpoint = btl_proc->proc_endpoints[i];
|
||||||
|
if(mca_btl_tcp_endpoint_accept(btl_endpoint, addr, sd)) {
|
||||||
|
OPAL_THREAD_UNLOCK(&btl_proc->proc_lock);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
OPAL_THREAD_UNLOCK(&btl_proc->proc_lock);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
85
ompi/mca/btl/tcp/btl_tcp_proc.h
Обычный файл
85
ompi/mca/btl/tcp/btl_tcp_proc.h
Обычный файл
@ -0,0 +1,85 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2004-2005 The Trustees of Indiana University.
|
||||||
|
* All rights reserved.
|
||||||
|
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
|
||||||
|
* All rights reserved.
|
||||||
|
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||||
|
* University of Stuttgart. All rights reserved.
|
||||||
|
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||||
|
* All rights reserved.
|
||||||
|
* $COPYRIGHT$
|
||||||
|
*
|
||||||
|
* Additional copyrights may follow
|
||||||
|
*
|
||||||
|
* $HEADER$
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef MCA_BTL_TCP_PROC_H
|
||||||
|
#define MCA_BTL_TCP_PROC_H
|
||||||
|
|
||||||
|
#include "opal/class/opal_object.h"
|
||||||
|
#include "orte/mca/ns/ns.h"
|
||||||
|
#include "ompi/proc/proc.h"
|
||||||
|
#include "btl_tcp.h"
|
||||||
|
#include "btl_tcp_addr.h"
|
||||||
|
#include "btl_tcp_endpoint.h"
|
||||||
|
|
||||||
|
#if defined(c_plusplus) || defined(__cplusplus)
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
OBJ_CLASS_DECLARATION(mca_btl_tcp_proc_t);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Represents the state of a remote process and the set of addresses
|
||||||
|
* that it exports. Also cache an instance of mca_btl_base_endpoint_t for
|
||||||
|
* each
|
||||||
|
* BTL instance that attempts to open a connection to the process.
|
||||||
|
*/
|
||||||
|
struct mca_btl_tcp_proc_t {
|
||||||
|
opal_list_item_t super;
|
||||||
|
/**< allow proc to be placed on a list */
|
||||||
|
|
||||||
|
ompi_proc_t *proc_ompi;
|
||||||
|
/**< pointer to corresponding ompi_proc_t */
|
||||||
|
|
||||||
|
orte_process_name_t proc_name;
|
||||||
|
/**< globally unique identifier for the process */
|
||||||
|
|
||||||
|
struct mca_btl_tcp_addr_t* proc_addrs;
|
||||||
|
/**< array of addresses exported by peer */
|
||||||
|
|
||||||
|
size_t proc_addr_count;
|
||||||
|
/**< number of addresses published by endpoint */
|
||||||
|
|
||||||
|
struct mca_btl_base_endpoint_t **proc_endpoints;
|
||||||
|
/**< array of endpoints that have been created to access this proc */
|
||||||
|
|
||||||
|
size_t proc_endpoint_count;
|
||||||
|
/**< number of endpoints */
|
||||||
|
|
||||||
|
opal_mutex_t proc_lock;
|
||||||
|
/**< lock to protect against concurrent access to proc state */
|
||||||
|
};
|
||||||
|
typedef struct mca_btl_tcp_proc_t mca_btl_tcp_proc_t;
|
||||||
|
|
||||||
|
mca_btl_tcp_proc_t* mca_btl_tcp_proc_create(ompi_proc_t* ompi_proc);
|
||||||
|
mca_btl_tcp_proc_t* mca_btl_tcp_proc_lookup(const orte_process_name_t* name);
|
||||||
|
int mca_btl_tcp_proc_insert(mca_btl_tcp_proc_t*, mca_btl_base_endpoint_t*);
|
||||||
|
int mca_btl_tcp_proc_remove(mca_btl_tcp_proc_t*, mca_btl_base_endpoint_t*);
|
||||||
|
bool mca_btl_tcp_proc_accept(mca_btl_tcp_proc_t*, struct sockaddr_in*, int);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Inlined function to return local TCP proc instance.
|
||||||
|
*/
|
||||||
|
|
||||||
|
static inline mca_btl_tcp_proc_t* mca_btl_tcp_proc_local(void)
|
||||||
|
{
|
||||||
|
if(NULL == mca_btl_tcp_component.tcp_local)
|
||||||
|
mca_btl_tcp_component.tcp_local = mca_btl_tcp_proc_create(ompi_proc_local());
|
||||||
|
return mca_btl_tcp_component.tcp_local;
|
||||||
|
}
|
||||||
|
|
||||||
|
#if defined(c_plusplus) || defined(__cplusplus)
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
#endif
|
22
ompi/mca/btl/tcp/configure.params
Обычный файл
22
ompi/mca/btl/tcp/configure.params
Обычный файл
@ -0,0 +1,22 @@
|
|||||||
|
# -*- shell-script -*-
|
||||||
|
#
|
||||||
|
# Copyright (c) 2004-2005 The Trustees of Indiana University.
|
||||||
|
# All rights reserved.
|
||||||
|
# Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
|
||||||
|
# All rights reserved.
|
||||||
|
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||||
|
# University of Stuttgart. All rights reserved.
|
||||||
|
# Copyright (c) 2004-2005 The Regents of the University of California.
|
||||||
|
# All rights reserved.
|
||||||
|
# $COPYRIGHT$
|
||||||
|
#
|
||||||
|
# Additional copyrights may follow
|
||||||
|
#
|
||||||
|
# $HEADER$
|
||||||
|
#
|
||||||
|
|
||||||
|
# Specific to this module
|
||||||
|
|
||||||
|
PARAM_INIT_FILE=btl_tcp.c
|
||||||
|
PARAM_CONFIG_HEADER_FILE="tcp_config.h"
|
||||||
|
PARAM_CONFIG_FILES="Makefile"
|
Загрузка…
x
Ссылка в новой задаче
Block a user