1
1

- first cut at tcp btl (working but not optimal)

- reworked btl error logging macros

This commit was SVN r6701.
Этот коммит содержится в:
Tim Woodall 2005-08-02 13:20:50 +00:00
родитель ed1022afd3
Коммит 2214f0502d
26 изменённых файлов: 3191 добавлений и 217 удалений

Просмотреть файл

@ -30,6 +30,7 @@ headers = \
libmca_btl_base_la_SOURCES = \ libmca_btl_base_la_SOURCES = \
$(headers) \ $(headers) \
btl_base_close.c \ btl_base_close.c \
btl_base_error.c \
btl_base_open.c \ btl_base_open.c \
btl_base_select.c btl_base_select.c

32
ompi/mca/btl/base/btl_base_error.c Обычный файл
Просмотреть файл

@ -0,0 +1,32 @@
#include "btl_base_error.h"
#include <stdarg.h>
#if OMPI_ENABLE_DEBUG
int mca_btl_base_debug = 1;
#endif
int mca_btl_base_err(const char* fmt, ...)
{
va_list list;
int ret;
va_start(list, fmt);
ret = vfprintf(stderr, fmt, list);
va_end(list);
return ret;
}
int mca_btl_base_out(const char* fmt, ...)
{
va_list list;
int ret;
va_start(list, fmt);
ret = vfprintf(stdout, fmt, list);
va_end(list);
return ret;
}

Просмотреть файл

@ -17,67 +17,49 @@
#ifndef MCA_BTL_BASE_ERROR_H #ifndef MCA_BTL_BASE_ERROR_H
#define MCA_BTL_BASE_ERROR_H #define MCA_BTL_BASE_ERROR_H
#include "ompi_config.h"
#include <stdio.h>
#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) extern int mca_btl_base_debug;
# define BTL_ERROR(fmt, ...) { \
opal_output(0, "[%s:%d:%s] my_name: [%lu,%lu,%lu] " fmt "\n", __FILE__, __LINE__, __func__, \
ORTE_NAME_ARGS(orte_process_info.my_name), __VA_ARGS__); \
}
#else
# if defined(__GNUC__) && !defined(__STDC__)
#define BTL_ERROR(fmt, args...) { \
opal_output(0, "[%s:%d:%s] my_name: [%lu,%lu,%lu]" fmt "\n", __FILE__, __LINE__, __func__,\
ORTE_NAME_ARGS(orte_process_info.my_name), ##args); \
}
#else
static inline void BTL_ERROR(char *fmt, ... )
{
va_list list;
va_start(list, fmt);
fprintf(stderr,"[%s:%d:%s] my_name: [%lu,%lu,%lu]",
__FILE__, __LINE__, __func__,
ORTE_NAME_ARGS(orte_process_info.my_name));
vfprintf(stderr, fmt, list); extern int mca_btl_base_err(const char*, ...);
fprintf(stderr, "\n"); extern int mca_btl_base_out(const char*, ...);
va_end(list);
}
#endif #define BTL_OUTPUT(args) \
#endif do { \
#if 0 mca_btl_base_out("[%lu,%lu,%lu][%s:%d:%s] ", \
#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901)L ORTE_NAME_ARGS(orte_process_info.my_name), \
# define BTL_DEBUG_OUT(fmt, ...) { \ __FILE__, __LINE__, __func__); \
opal_output(0, "[%s:%d:%s] " fmt "\n", __FILE__, __LINE__, __func__, __VA_ARGS__); \ mca_btl_base_out args; \
} mca_btl_base_out("\n"); \
#else } while(0);
# if defined(__GNUC__) && !defined(__STDC__)
#define BTL_DEBUG_OUT(fmt, args...) { \
opal_output(0, "[%s:%d:%s] " fmt "\n", __FILE__, __LINE__, __func__, ##args); \ #define BTL_ERROR(args) \
} do { \
#else mca_btl_base_err("[%lu,%lu,%lu][%s:%d:%s] ", \
static inline void BTL_DEBUG_OUT(char *fmt, ... ) ORTE_NAME_ARGS(orte_process_info.my_name), \
{ __FILE__, __LINE__, __func__); \
va_list list; mca_btl_base_err args; \
va_start(list, fmt); mca_btl_base_out("\n"); \
fprintf(stderr, "[%s:%d:%s]", __FILE__, __LINE__, __func__, list); } while(0);
vfprintf(stderr, fmt, list);
vfpritnf(stderr, "\n");
va_end(list); #if OMPI_ENABLE_DEBUG
} #define BTL_DEBUG(args) \
#endif do { \
#endif if(mca_btl_base_debug) { \
mca_btl_base_err("[%lu,%lu,%lu][%s:%d:%s] ", \
ORTE_NAME_ARGS(orte_process_info.my_name), \
__FILE__, __LINE__, __func__); \
mca_btl_base_err args; \
mca_btl_base_out("\n"); \
} \
} while(0);
#else #else
#if defined(ACCEPT_C99) && __STDC_VERSION__ >= 199901L #define BTL_DEBUG(args)
# define BTL_DEBUG_OUT(fmt, ...)
#else
# if defined(__GNUC__) && !defined(__STDC__)
#define BTL_DEBUG_OUT(fmt, args...)
#else
static inline void BTL_DEBUG_OUT(char *fmt, ... )
{
}
#endif
#endif
#endif #endif
#endif #endif

Просмотреть файл

@ -25,6 +25,9 @@
#include "mca/btl/btl.h" #include "mca/btl/btl.h"
#include "mca/btl/base/base.h" #include "mca/btl/base/base.h"
int mca_btl_base_debug;
/* /*
* mca_btl_base_descriptor_t * mca_btl_base_descriptor_t
*/ */

Просмотреть файл

@ -127,7 +127,7 @@ int mca_btl_mvapi_del_procs(struct mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t ** peers) struct mca_btl_base_endpoint_t ** peers)
{ {
/* Stub */ /* Stub */
BTL_DEBUG_OUT("Stub\n"); BTL_DEBUG(("Stub\n"));
return OMPI_SUCCESS; return OMPI_SUCCESS;
} }
@ -209,7 +209,7 @@ int mca_btl_mvapi_free(
} else if(frag->size == mca_btl_mvapi_component.eager_limit){ } else if(frag->size == mca_btl_mvapi_component.eager_limit){
MCA_BTL_IB_FRAG_RETURN_EAGER(btl, frag); MCA_BTL_IB_FRAG_RETURN_EAGER(btl, frag);
} else { } else {
BTL_ERROR("invalid descriptor"); BTL_ERROR(("invalid descriptor"));
} }
return OMPI_SUCCESS; return OMPI_SUCCESS;
} }
@ -288,13 +288,13 @@ mca_btl_base_descriptor_t* mca_btl_mvapi_prepare_src(
rc = mca_mpool_base_remove((void*) vapi_reg->base_reg.base); rc = mca_mpool_base_remove((void*) vapi_reg->base_reg.base);
if(OMPI_SUCCESS != rc) { if(OMPI_SUCCESS != rc) {
BTL_ERROR("error removing memory region from memory pool tree"); BTL_ERROR(("error removing memory region from memory pool tree"));
return NULL; return NULL;
} }
if(is_leave_pinned) { if(is_leave_pinned) {
if(NULL == opal_list_remove_item(&mvapi_btl->reg_mru_list, (opal_list_item_t*) vapi_reg)){ if(NULL == opal_list_remove_item(&mvapi_btl->reg_mru_list, (opal_list_item_t*) vapi_reg)){
BTL_ERROR("error removing item from reg_mru_list"); BTL_ERROR(("error removing item from reg_mru_list"));
return NULL; return NULL;
} }
} }
@ -315,7 +315,7 @@ mca_btl_base_descriptor_t* mca_btl_mvapi_prepare_src(
if(rc != OMPI_SUCCESS) { if(rc != OMPI_SUCCESS) {
BTL_ERROR("error inserting memory region into memory pool tree"); BTL_ERROR(("error inserting memory region into memory pool tree"));
return NULL; return NULL;
} }
@ -329,7 +329,7 @@ mca_btl_base_descriptor_t* mca_btl_mvapi_prepare_src(
else if(is_leave_pinned) { else if(is_leave_pinned) {
/* the current memory region is large enough and we should leave the memory pinned */ /* the current memory region is large enough and we should leave the memory pinned */
if(NULL == opal_list_remove_item(&mvapi_btl->reg_mru_list, (opal_list_item_t*) vapi_reg)) { if(NULL == opal_list_remove_item(&mvapi_btl->reg_mru_list, (opal_list_item_t*) vapi_reg)) {
BTL_ERROR("error removing item from reg_mru_list"); BTL_ERROR(("error removing item from reg_mru_list"));
return NULL; return NULL;
} }
@ -391,7 +391,7 @@ mca_btl_base_descriptor_t* mca_btl_mvapi_prepare_src(
opal_list_remove_first(&mvapi_btl->reg_mru_list); opal_list_remove_first(&mvapi_btl->reg_mru_list);
if( NULL == old_reg) { if( NULL == old_reg) {
BTL_ERROR("error removing item from reg_mru_list"); BTL_ERROR(("error removing item from reg_mru_list"));
return NULL; return NULL;
} }
@ -399,7 +399,7 @@ mca_btl_base_descriptor_t* mca_btl_mvapi_prepare_src(
rc = mca_mpool_base_remove((void*) old_reg->base_reg.base); rc = mca_mpool_base_remove((void*) old_reg->base_reg.base);
if(OMPI_SUCCESS != rc) { if(OMPI_SUCCESS != rc) {
BTL_ERROR("error removing memory region from memory pool tree"); BTL_ERROR(("error removing memory region from memory pool tree"));
return NULL; return NULL;
} }
@ -570,7 +570,7 @@ mca_btl_base_descriptor_t* mca_btl_mvapi_prepare_dst(
rc = mca_mpool_base_remove((void*) vapi_reg->base_reg.base); rc = mca_mpool_base_remove((void*) vapi_reg->base_reg.base);
if(OMPI_SUCCESS != rc) { if(OMPI_SUCCESS != rc) {
BTL_ERROR("error removing memory region from memory pool tree"); BTL_ERROR(("error removing memory region from memory pool tree"));
return NULL; return NULL;
} }
@ -580,7 +580,7 @@ mca_btl_base_descriptor_t* mca_btl_mvapi_prepare_dst(
*/ */
if(NULL == opal_list_remove_item(&mvapi_btl->reg_mru_list, (opal_list_item_t*) vapi_reg)) { if(NULL == opal_list_remove_item(&mvapi_btl->reg_mru_list, (opal_list_item_t*) vapi_reg)) {
BTL_ERROR("error removing item from reg_mru_list"); BTL_ERROR(("error removing item from reg_mru_list"));
return NULL; return NULL;
} }
} }
@ -599,7 +599,7 @@ mca_btl_base_descriptor_t* mca_btl_mvapi_prepare_dst(
(mca_mpool_base_registration_t*) vapi_reg); (mca_mpool_base_registration_t*) vapi_reg);
if(OMPI_SUCCESS != rc) { if(OMPI_SUCCESS != rc) {
BTL_ERROR("error inserting memory region into memory pool tree"); BTL_ERROR(("error inserting memory region into memory pool tree"));
return NULL; return NULL;
} }
OBJ_RETAIN(vapi_reg); OBJ_RETAIN(vapi_reg);
@ -614,7 +614,7 @@ mca_btl_base_descriptor_t* mca_btl_mvapi_prepare_dst(
else if(is_leave_pinned){ else if(is_leave_pinned){
/* the current memory region is large enough and we should leave the memory pinned */ /* the current memory region is large enough and we should leave the memory pinned */
if(NULL == opal_list_remove_item(&mvapi_btl->reg_mru_list, (opal_list_item_t*) vapi_reg)) { if(NULL == opal_list_remove_item(&mvapi_btl->reg_mru_list, (opal_list_item_t*) vapi_reg)) {
BTL_ERROR("error removing item from reg_mru_list"); BTL_ERROR(("error removing item from reg_mru_list"));
return NULL; return NULL;
} }
opal_list_append(&mvapi_btl->reg_mru_list, (opal_list_item_t*) vapi_reg); opal_list_append(&mvapi_btl->reg_mru_list, (opal_list_item_t*) vapi_reg);
@ -639,13 +639,13 @@ mca_btl_base_descriptor_t* mca_btl_mvapi_prepare_dst(
opal_list_remove_first(&mvapi_btl->reg_mru_list); opal_list_remove_first(&mvapi_btl->reg_mru_list);
if( NULL == old_reg) { if( NULL == old_reg) {
BTL_ERROR("error removing item from reg_mru_list"); BTL_ERROR(("error removing item from reg_mru_list"));
return NULL; return NULL;
} }
rc = mca_mpool_base_remove((void*) old_reg->base_reg.base); rc = mca_mpool_base_remove((void*) old_reg->base_reg.base);
if(OMPI_SUCCESS !=rc ) { if(OMPI_SUCCESS !=rc ) {
BTL_ERROR("error removing memory region from memory pool tree"); BTL_ERROR(("error removing memory region from memory pool tree"));
return NULL; return NULL;
} }
@ -665,7 +665,7 @@ mca_btl_base_descriptor_t* mca_btl_mvapi_prepare_dst(
(void*) (&mvapi_btl->super), (void*) (&mvapi_btl->super),
(mca_mpool_base_registration_t*) vapi_reg); (mca_mpool_base_registration_t*) vapi_reg);
if(OMPI_SUCCESS != rc){ if(OMPI_SUCCESS != rc){
BTL_ERROR("error inserting memory region into memory pool"); BTL_ERROR(("error inserting memory region into memory pool"));
return NULL; return NULL;
} }
@ -822,8 +822,7 @@ static void async_event_handler(VAPI_hca_hndl_t hca_hndl,
case VAPI_SEND_QUEUE_DRAINED: case VAPI_SEND_QUEUE_DRAINED:
case VAPI_PORT_ACTIVE: case VAPI_PORT_ACTIVE:
{ {
BTL_DEBUG_OUT("Got an asynchronous event: %s\n", BTL_DEBUG(("Got an asynchronous event: %s\n", VAPI_event_record_sym(event_p->type)));
VAPI_event_record_sym(event_p->type));
break; break;
} }
case VAPI_CQ_ERROR: case VAPI_CQ_ERROR:
@ -835,14 +834,14 @@ static void async_event_handler(VAPI_hca_hndl_t hca_hndl,
case VAPI_LOCAL_CATASTROPHIC_ERROR: case VAPI_LOCAL_CATASTROPHIC_ERROR:
case VAPI_PORT_ERROR: case VAPI_PORT_ERROR:
{ {
BTL_ERROR("Got an asynchronous event: %s (%s)", BTL_ERROR(("Got an asynchronous event: %s (%s)",
VAPI_event_record_sym(event_p->type), VAPI_event_record_sym(event_p->type),
VAPI_event_syndrome_sym(event_p->syndrome)); VAPI_event_syndrome_sym(event_p->syndrome)));
break; break;
} }
default: default:
BTL_ERROR("Warning!! Got an undefined " BTL_ERROR(("Warning!! Got an undefined "
"asynchronous event"); "asynchronous event"));
} }
} }
@ -863,7 +862,7 @@ int mca_btl_mvapi_module_init(mca_btl_mvapi_module_t *mvapi_btl)
ret = VAPI_alloc_pd(mvapi_btl->nic, &mvapi_btl->ptag); ret = VAPI_alloc_pd(mvapi_btl->nic, &mvapi_btl->ptag);
if(ret != VAPI_OK) { if(ret != VAPI_OK) {
BTL_ERROR("error in VAPI_alloc_pd: %s", VAPI_strerror(ret)); BTL_ERROR(("error in VAPI_alloc_pd: %s", VAPI_strerror(ret)));
return OMPI_ERROR; return OMPI_ERROR;
} }
@ -880,7 +879,7 @@ int mca_btl_mvapi_module_init(mca_btl_mvapi_module_t *mvapi_btl)
&mvapi_btl->srq_hndl_high, &mvapi_btl->srq_hndl_high,
&srq_attr_out); &srq_attr_out);
if(ret != VAPI_OK) { if(ret != VAPI_OK) {
BTL_ERROR("error in VAPI_create_srq: %s", VAPI_strerror(ret)); BTL_ERROR(("error in VAPI_create_srq: %s", VAPI_strerror(ret)));
return OMPI_ERROR; return OMPI_ERROR;
} }
ret = VAPI_create_srq(mvapi_btl->nic, ret = VAPI_create_srq(mvapi_btl->nic,
@ -888,7 +887,7 @@ int mca_btl_mvapi_module_init(mca_btl_mvapi_module_t *mvapi_btl)
&mvapi_btl->srq_hndl_low, &mvapi_btl->srq_hndl_low,
&srq_attr_out); &srq_attr_out);
if(ret != VAPI_OK) { if(ret != VAPI_OK) {
BTL_ERROR("error in VAPI_create_srq: %s", VAPI_strerror(ret)); BTL_ERROR(("error in VAPI_create_srq: %s", VAPI_strerror(ret)));
return OMPI_ERROR; return OMPI_ERROR;
} }
@ -901,7 +900,7 @@ int mca_btl_mvapi_module_init(mca_btl_mvapi_module_t *mvapi_btl)
if( VAPI_OK != ret) { if( VAPI_OK != ret) {
BTL_ERROR("error in VAPI_create_cq: %s", VAPI_strerror(ret)); BTL_ERROR(("error in VAPI_create_cq: %s", VAPI_strerror(ret)));
return OMPI_ERROR; return OMPI_ERROR;
} }
@ -910,13 +909,13 @@ int mca_btl_mvapi_module_init(mca_btl_mvapi_module_t *mvapi_btl)
if( VAPI_OK != ret) { if( VAPI_OK != ret) {
BTL_ERROR("error in VAPI_create_cq: %s", VAPI_strerror(ret)); BTL_ERROR(("error in VAPI_create_cq: %s", VAPI_strerror(ret)));
return OMPI_ERROR; return OMPI_ERROR;
} }
if(cqe_cnt <= 0) { if(cqe_cnt <= 0) {
BTL_ERROR("error creating completion queue "); BTL_ERROR(("error creating completion queue "));
return OMPI_ERROR; return OMPI_ERROR;
} }
@ -924,7 +923,7 @@ int mca_btl_mvapi_module_init(mca_btl_mvapi_module_t *mvapi_btl)
async_event_handler, 0, &mvapi_btl->async_handler); async_event_handler, 0, &mvapi_btl->async_handler);
if(VAPI_OK != ret) { if(VAPI_OK != ret) {
BTL_ERROR("error in EVAPI_set_async_event_handler: %s", VAPI_strerror(ret)); BTL_ERROR(("error in EVAPI_set_async_event_handler: %s", VAPI_strerror(ret)));
return OMPI_ERROR; return OMPI_ERROR;
} }

Просмотреть файл

@ -240,10 +240,10 @@ struct mca_btl_mvapi_module_t {
post_srr_sub_desc_post, \ post_srr_sub_desc_post, \
&post_srr_sub_rwqe_posted); \ &post_srr_sub_rwqe_posted); \
if(VAPI_OK != post_srr_sub_frag->ret) { \ if(VAPI_OK != post_srr_sub_frag->ret) { \
BTL_ERROR("error posting receive descriptors to shared receive queue: %s",\ BTL_ERROR(("error posting receive descriptors to shared receive queue: %s",\
VAPI_strerror(post_srr_sub_frag->ret)); \ VAPI_strerror(post_srr_sub_frag->ret))); \
} else if(post_srr_sub_rwqe_posted < 1) { \ } else if(post_srr_sub_rwqe_posted < 1) { \
BTL_ERROR("error posting receive descriptors to shared receive queue, number of entries posted is %d", post_srr_sub_rwqe_posted); \ BTL_ERROR(("error posting receive descriptors to shared receive queue, number of entries posted is %d", post_srr_sub_rwqe_posted)); \
} else {\ } else {\
OPAL_THREAD_ADD32(post_srr_sub_srr_posted, post_srr_sub_cnt); \ OPAL_THREAD_ADD32(post_srr_sub_srr_posted, post_srr_sub_cnt); \
}\ }\

Просмотреть файл

@ -255,7 +255,7 @@ mca_btl_base_module_t** mca_btl_mvapi_component_init(int *num_btl_modules,
/* Determine the number of hca's available on the host */ /* Determine the number of hca's available on the host */
vapi_ret=EVAPI_list_hcas(0, &num_hcas, NULL); vapi_ret=EVAPI_list_hcas(0, &num_hcas, NULL);
if( VAPI_EAGAIN != vapi_ret || 0 == num_hcas ) { if( VAPI_EAGAIN != vapi_ret || 0 == num_hcas ) {
BTL_ERROR("No hca's found on this host!"); BTL_ERROR(("No hca's found on this host!"));
return NULL; return NULL;
} }
@ -284,14 +284,14 @@ mca_btl_base_module_t** mca_btl_mvapi_component_init(int *num_btl_modules,
for(i = 0; i < num_hcas; i++){ for(i = 0; i < num_hcas; i++){
vapi_ret = EVAPI_get_hca_hndl(hca_ids[i], &hca_hndl); vapi_ret = EVAPI_get_hca_hndl(hca_ids[i], &hca_hndl);
if(VAPI_OK != vapi_ret) { if(VAPI_OK != vapi_ret) {
BTL_ERROR("error getting hca handle: %s", VAPI_strerror(vapi_ret)); BTL_ERROR(("error getting hca handle: %s", VAPI_strerror(vapi_ret)));
return NULL; return NULL;
} }
vapi_ret = VAPI_query_hca_cap(hca_hndl, &hca_vendor, &hca_cap); vapi_ret = VAPI_query_hca_cap(hca_hndl, &hca_vendor, &hca_cap);
if(VAPI_OK != vapi_ret) { if(VAPI_OK != vapi_ret) {
BTL_ERROR("error getting hca properties %s", VAPI_strerror(vapi_ret)); BTL_ERROR(("error getting hca properties %s", VAPI_strerror(vapi_ret)));
return NULL; return NULL;
} }
@ -300,7 +300,7 @@ mca_btl_base_module_t** mca_btl_mvapi_component_init(int *num_btl_modules,
for(j = 1; j <= hca_cap.phys_port_num; j++){ for(j = 1; j <= hca_cap.phys_port_num; j++){
vapi_ret = VAPI_query_hca_port_prop(hca_hndl, (IB_port_t) j, &hca_port); vapi_ret = VAPI_query_hca_port_prop(hca_hndl, (IB_port_t) j, &hca_port);
if(VAPI_OK != vapi_ret) { if(VAPI_OK != vapi_ret) {
BTL_ERROR("error getting hca port properties %s", VAPI_strerror(vapi_ret)); BTL_ERROR(("error getting hca port properties %s", VAPI_strerror(vapi_ret)));
return NULL; return NULL;
} }
@ -385,7 +385,7 @@ mca_btl_base_module_t** mca_btl_mvapi_component_init(int *num_btl_modules,
&hca_pd); &hca_pd);
if(NULL == mvapi_btl->ib_pool) { if(NULL == mvapi_btl->ib_pool) {
BTL_ERROR("error creating vapi memory pool! aborting mvapi btl initialization"); BTL_ERROR(("error creating vapi memory pool! aborting mvapi btl initialization"));
return NULL; return NULL;
} }
/* Initialize pool of send fragments */ /* Initialize pool of send fragments */
@ -495,16 +495,16 @@ int mca_btl_mvapi_component_progress()
ret = VAPI_poll_cq(mvapi_btl->nic, mvapi_btl->cq_hndl_high, &comp); ret = VAPI_poll_cq(mvapi_btl->nic, mvapi_btl->cq_hndl_high, &comp);
if(VAPI_OK == ret) { if(VAPI_OK == ret) {
if(comp.status != VAPI_SUCCESS) { if(comp.status != VAPI_SUCCESS) {
BTL_ERROR("Got error : %s, Vendor code : %d Frag : %p", BTL_ERROR(("Got error : %s, Vendor code : %d Frag : %p",
VAPI_wc_status_sym(comp.status), VAPI_wc_status_sym(comp.status),
comp.vendor_err_syndrome, comp.id); comp.vendor_err_syndrome, comp.id));
return OMPI_ERROR; return OMPI_ERROR;
} }
/* Handle work completions */ /* Handle work completions */
switch(comp.opcode) { switch(comp.opcode) {
case VAPI_CQE_RQ_RDMA_WITH_IMM: case VAPI_CQE_RQ_RDMA_WITH_IMM:
BTL_ERROR("Got an RDMA with Immediate data!, not supported!"); BTL_ERROR(("Got an RDMA with Immediate data!, not supported!"));
return OMPI_ERROR; return OMPI_ERROR;
case VAPI_CQE_SQ_RDMA_WRITE: case VAPI_CQE_SQ_RDMA_WRITE:
@ -520,7 +520,7 @@ int mca_btl_mvapi_component_progress()
case VAPI_CQE_RQ_SEND_DATA: case VAPI_CQE_RQ_SEND_DATA:
/* Process a RECV */ /* Process a RECV */
BTL_DEBUG_OUT("Got a recv completion"); BTL_DEBUG(("Got a recv completion"));
frag = (mca_btl_mvapi_frag_t*) comp.id; frag = (mca_btl_mvapi_frag_t*) comp.id;
endpoint = (mca_btl_mvapi_endpoint_t*) frag->endpoint; endpoint = (mca_btl_mvapi_endpoint_t*) frag->endpoint;
@ -543,7 +543,7 @@ int mca_btl_mvapi_component_progress()
break; break;
default: default:
BTL_ERROR("Unhandled work completion opcode is %d", comp.opcode); BTL_ERROR(("Unhandled work completion opcode is %d", comp.opcode));
break; break;
} }
} }
@ -553,16 +553,16 @@ int mca_btl_mvapi_component_progress()
ret = VAPI_poll_cq(mvapi_btl->nic, mvapi_btl->cq_hndl_low, &comp); ret = VAPI_poll_cq(mvapi_btl->nic, mvapi_btl->cq_hndl_low, &comp);
if(VAPI_OK == ret) { if(VAPI_OK == ret) {
if(comp.status != VAPI_SUCCESS) { if(comp.status != VAPI_SUCCESS) {
BTL_ERROR("Got error : %s, Vendor code : %d Frag : %p", BTL_ERROR(("Got error : %s, Vendor code : %d Frag : %p",
VAPI_wc_status_sym(comp.status), VAPI_wc_status_sym(comp.status),
comp.vendor_err_syndrome, comp.id); comp.vendor_err_syndrome, comp.id));
return OMPI_ERROR; return OMPI_ERROR;
} }
/* Handle n/w completions */ /* Handle n/w completions */
switch(comp.opcode) { switch(comp.opcode) {
case VAPI_CQE_RQ_RDMA_WITH_IMM: case VAPI_CQE_RQ_RDMA_WITH_IMM:
BTL_ERROR("Got an RDMA with Immediate data!, not supported!"); BTL_ERROR(("Got an RDMA with Immediate data!, not supported!"));
return OMPI_ERROR; return OMPI_ERROR;
case VAPI_CQE_SQ_RDMA_WRITE: case VAPI_CQE_SQ_RDMA_WRITE:
@ -577,7 +577,7 @@ int mca_btl_mvapi_component_progress()
case VAPI_CQE_RQ_SEND_DATA: case VAPI_CQE_RQ_SEND_DATA:
BTL_DEBUG_OUT("Got a recv completion"); BTL_DEBUG(("Got a recv completion"));
frag = (mca_btl_mvapi_frag_t*) comp.id; frag = (mca_btl_mvapi_frag_t*) comp.id;
endpoint = (mca_btl_mvapi_endpoint_t*) frag->endpoint; endpoint = (mca_btl_mvapi_endpoint_t*) frag->endpoint;
frag->rc=OMPI_SUCCESS; frag->rc=OMPI_SUCCESS;
@ -599,7 +599,7 @@ int mca_btl_mvapi_component_progress()
break; break;
default: default:
BTL_ERROR("Errorneous network completion"); BTL_ERROR(("Errorneous network completion"));
break; break;
} }
} }

Просмотреть файл

@ -184,10 +184,10 @@ static int mca_btl_mvapi_endpoint_send_connect_req(mca_btl_base_endpoint_t* endp
mca_btl_mvapi_endpoint_send_cb, NULL); mca_btl_mvapi_endpoint_send_cb, NULL);
BTL_DEBUG_OUT("Sending High Priority QP num = %d, Low Priority QP num = %d, LID = %d", BTL_DEBUG(("Sending High Priority QP num = %d, Low Priority QP num = %d, LID = %d",
endpoint->lcl_qp_prop_high.qp_num, endpoint->lcl_qp_prop_high.qp_num,
endpoint->lcl_qp_prop_low.qp_num, endpoint->lcl_qp_prop_low.qp_num,
endpoint->endpoint_btl->port.lid); endpoint->endpoint_btl->port.lid));
if(rc < 0) { if(rc < 0) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
@ -260,10 +260,10 @@ static int mca_btl_mvapi_endpoint_set_remote_info(mca_btl_base_endpoint_t* endpo
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
return rc; return rc;
} }
BTL_DEBUG_OUT("Received High Priority QP num = %d, Low Priority QP num %d, LID = %d", BTL_DEBUG(("Received High Priority QP num = %d, Low Priority QP num %d, LID = %d",
endpoint->rem_qp_num_high, endpoint->rem_qp_num_high,
endpoint->rem_qp_num_low, endpoint->rem_qp_num_low,
endpoint->rem_lid); endpoint->rem_lid));
return ORTE_SUCCESS; return ORTE_SUCCESS;
} }
@ -293,7 +293,7 @@ static int mca_btl_mvapi_endpoint_start_connect(mca_btl_base_endpoint_t* endpoin
&endpoint->lcl_qp_hndl_high, &endpoint->lcl_qp_hndl_high,
&endpoint->lcl_qp_prop_high, &endpoint->lcl_qp_prop_high,
VAPI_TS_RC))) { VAPI_TS_RC))) {
BTL_ERROR("error creating queue pair, error code %d", rc); BTL_ERROR(("error creating queue pair, error code %d", rc));
return rc; return rc;
} }
@ -308,19 +308,19 @@ static int mca_btl_mvapi_endpoint_start_connect(mca_btl_base_endpoint_t* endpoin
&endpoint->lcl_qp_prop_low, &endpoint->lcl_qp_prop_low,
VAPI_TS_RC))) { VAPI_TS_RC))) {
BTL_ERROR("error creating queue pair, error code %d", rc); BTL_ERROR(("error creating queue pair, error code %d", rc));
return rc; return rc;
} }
BTL_DEBUG_OUT("Initialized High Priority QP num = %d, Low Priority QP num = %d, LID = %d", BTL_DEBUG(("Initialized High Priority QP num = %d, Low Priority QP num = %d, LID = %d",
endpoint->lcl_qp_prop_high.qp_num, endpoint->lcl_qp_prop_high.qp_num,
endpoint->lcl_qp_prop_low.qp_num, endpoint->lcl_qp_prop_low.qp_num,
endpoint->endpoint_btl->port.lid); endpoint->endpoint_btl->port.lid));
/* Send connection info over to remote endpoint */ /* Send connection info over to remote endpoint */
endpoint->endpoint_state = MCA_BTL_IB_CONNECTING; endpoint->endpoint_state = MCA_BTL_IB_CONNECTING;
if(OMPI_SUCCESS != (rc = mca_btl_mvapi_endpoint_send_connect_req(endpoint))) { if(OMPI_SUCCESS != (rc = mca_btl_mvapi_endpoint_send_connect_req(endpoint))) {
BTL_ERROR("error sending connect request, error code %d", rc); BTL_ERROR(("error sending connect request, error code %d", rc));
return rc; return rc;
} }
return OMPI_SUCCESS; return OMPI_SUCCESS;
@ -344,7 +344,7 @@ static int mca_btl_mvapi_endpoint_reply_start_connect(mca_btl_mvapi_endpoint_t *
&endpoint->lcl_qp_hndl_high, &endpoint->lcl_qp_hndl_high,
&endpoint->lcl_qp_prop_high, &endpoint->lcl_qp_prop_high,
VAPI_TS_RC))) { VAPI_TS_RC))) {
BTL_ERROR("error creating queue pair, error code %d", rc); BTL_ERROR(("error creating queue pair, error code %d", rc));
return rc; return rc;
} }
@ -358,14 +358,14 @@ static int mca_btl_mvapi_endpoint_reply_start_connect(mca_btl_mvapi_endpoint_t *
&endpoint->lcl_qp_hndl_low, &endpoint->lcl_qp_hndl_low,
&endpoint->lcl_qp_prop_low, &endpoint->lcl_qp_prop_low,
VAPI_TS_RC))) { VAPI_TS_RC))) {
BTL_ERROR("error creating queue pair, error code %d", rc); BTL_ERROR(("error creating queue pair, error code %d", rc));
return rc; return rc;
} }
BTL_DEBUG_OUT("Initialized High Priority QP num = %d, Low Priority QP num = %d, LID = %d", BTL_DEBUG(("Initialized High Priority QP num = %d, Low Priority QP num = %d, LID = %d",
endpoint->lcl_qp_prop_high.qp_num, endpoint->lcl_qp_prop_high.qp_num,
endpoint->lcl_qp_prop_low.qp_num, endpoint->lcl_qp_prop_low.qp_num,
endpoint->endpoint_btl->port.lid); endpoint->endpoint_btl->port.lid));
@ -377,13 +377,13 @@ static int mca_btl_mvapi_endpoint_reply_start_connect(mca_btl_mvapi_endpoint_t *
rc = mca_btl_mvapi_endpoint_connect(endpoint); rc = mca_btl_mvapi_endpoint_connect(endpoint);
if(rc != OMPI_SUCCESS) { if(rc != OMPI_SUCCESS) {
BTL_ERROR("error in endpoint connect error code is %d", rc); BTL_ERROR(("error in endpoint connect error code is %d", rc));
return rc; return rc;
} }
/* Send connection info over to remote endpoint */ /* Send connection info over to remote endpoint */
if(OMPI_SUCCESS != (rc = mca_btl_mvapi_endpoint_send_connect_req(endpoint))) { if(OMPI_SUCCESS != (rc = mca_btl_mvapi_endpoint_send_connect_req(endpoint))) {
BTL_ERROR("error in endpoint send connect request error code is %d", rc); BTL_ERROR(("error in endpoint send connect request error code is %d", rc));
return rc; return rc;
} }
return OMPI_SUCCESS; return OMPI_SUCCESS;
@ -446,7 +446,7 @@ static void mca_btl_mvapi_endpoint_recv(
* and then reply with our QP information */ * and then reply with our QP information */
if(OMPI_SUCCESS != (rc = mca_btl_mvapi_endpoint_reply_start_connect(ib_endpoint, buffer))) { if(OMPI_SUCCESS != (rc = mca_btl_mvapi_endpoint_reply_start_connect(ib_endpoint, buffer))) {
BTL_ERROR("error in endpoint reply start connect"); BTL_ERROR(("error in endpoint reply start connect"));
break; break;
} }
@ -458,7 +458,7 @@ static void mca_btl_mvapi_endpoint_recv(
mca_btl_mvapi_endpoint_set_remote_info(ib_endpoint, buffer); mca_btl_mvapi_endpoint_set_remote_info(ib_endpoint, buffer);
if(OMPI_SUCCESS != (rc = mca_btl_mvapi_endpoint_connect(ib_endpoint))) { if(OMPI_SUCCESS != (rc = mca_btl_mvapi_endpoint_connect(ib_endpoint))) {
BTL_ERROR("endpoint connect error: %d", rc); BTL_ERROR(("endpoint connect error: %d", rc));
break; break;
} }
@ -478,7 +478,7 @@ static void mca_btl_mvapi_endpoint_recv(
case MCA_BTL_IB_CONNECTED : case MCA_BTL_IB_CONNECTED :
break; break;
default : default :
BTL_ERROR("Invalid endpoint state %d", endpoint_state); BTL_ERROR(("Invalid endpoint state %d", endpoint_state));
} }
break; break;
@ -519,7 +519,7 @@ int mca_btl_mvapi_endpoint_send(
switch(endpoint->endpoint_state) { switch(endpoint->endpoint_state) {
case MCA_BTL_IB_CONNECTING: case MCA_BTL_IB_CONNECTING:
BTL_DEBUG_OUT("Queing because state is connecting"); BTL_DEBUG(("Queing because state is connecting"));
opal_list_append(&endpoint->pending_send_frags, opal_list_append(&endpoint->pending_send_frags,
(opal_list_item_t *)frag); (opal_list_item_t *)frag);
@ -529,7 +529,7 @@ int mca_btl_mvapi_endpoint_send(
case MCA_BTL_IB_CONNECT_ACK: case MCA_BTL_IB_CONNECT_ACK:
BTL_DEBUG_OUT("Queuing because waiting for ack"); BTL_DEBUG(("Queuing because waiting for ack"));
opal_list_append(&endpoint->pending_send_frags, opal_list_append(&endpoint->pending_send_frags,
(opal_list_item_t *)frag); (opal_list_item_t *)frag);
@ -539,7 +539,7 @@ int mca_btl_mvapi_endpoint_send(
case MCA_BTL_IB_CLOSED: case MCA_BTL_IB_CLOSED:
BTL_DEBUG_OUT("Connection to endpoint closed ... connecting ..."); BTL_DEBUG(("Connection to endpoint closed ... connecting ..."));
opal_list_append(&endpoint->pending_send_frags, opal_list_append(&endpoint->pending_send_frags,
(opal_list_item_t *)frag); (opal_list_item_t *)frag);
@ -558,10 +558,10 @@ int mca_btl_mvapi_endpoint_send(
mvapi_btl = endpoint->endpoint_btl; mvapi_btl = endpoint->endpoint_btl;
BTL_DEBUG_OUT("Send to : %d, len : %d, frag : %p", BTL_DEBUG(("Send to : %d, len : %d, frag : %p",
endpoint->endpoint_proc->proc_guid.vpid, endpoint->endpoint_proc->proc_guid.vpid,
frag->sg_entry.len, frag->sg_entry.len,
frag); frag));
rc = mca_btl_mvapi_endpoint_post_send(mvapi_btl, endpoint, frag); rc = mca_btl_mvapi_endpoint_post_send(mvapi_btl, endpoint, frag);
@ -598,7 +598,7 @@ void mca_btl_mvapi_progress_send_frags(mca_btl_mvapi_endpoint_t* endpoint)
/* We need to post this one */ /* We need to post this one */
if(OMPI_SUCCESS != mca_btl_mvapi_endpoint_post_send(mvapi_btl, endpoint, frag)) if(OMPI_SUCCESS != mca_btl_mvapi_endpoint_post_send(mvapi_btl, endpoint, frag))
BTL_ERROR("error in mca_btl_mvapi_endpoint_send"); BTL_ERROR(("error in mca_btl_mvapi_endpoint_send"));
} }
} }
@ -703,7 +703,7 @@ int mca_btl_mvapi_endpoint_create_qp(
qp_prop); qp_prop);
if(VAPI_OK != ret) { if(VAPI_OK != ret) {
BTL_ERROR("error creating the queue pair: %s", VAPI_strerror(ret)); BTL_ERROR(("error creating the queue pair: %s", VAPI_strerror(ret)));
return OMPI_ERROR; return OMPI_ERROR;
} }
return OMPI_SUCCESS; return OMPI_SUCCESS;
@ -749,11 +749,11 @@ int mca_btl_mvapi_endpoint_qp_init_query(
&qp_attr, &qp_attr_mask, &qp_cap); &qp_attr, &qp_attr_mask, &qp_cap);
if(VAPI_OK != ret) { if(VAPI_OK != ret) {
BTL_ERROR("Error modifying the queue pair: %s", VAPI_strerror(ret)); BTL_ERROR(("Error modifying the queue pair: %s", VAPI_strerror(ret)));
return OMPI_ERROR; return OMPI_ERROR;
} }
BTL_DEBUG_OUT("Modified to init..Qp %d", qp_hndl); BTL_DEBUG(("Modified to init..Qp %d", qp_hndl));
/********************** INIT --> RTR ************************/ /********************** INIT --> RTR ************************/
QP_ATTR_MASK_CLR_ALL(qp_attr_mask); QP_ATTR_MASK_CLR_ALL(qp_attr_mask);
@ -784,11 +784,11 @@ int mca_btl_mvapi_endpoint_qp_init_query(
&qp_attr, &qp_attr_mask, &qp_cap); &qp_attr, &qp_attr_mask, &qp_cap);
if(VAPI_OK != ret) { if(VAPI_OK != ret) {
BTL_ERROR("Error modifying the queue pair: %s", VAPI_strerror(ret)); BTL_ERROR(("Error modifying the queue pair: %s", VAPI_strerror(ret)));
return OMPI_ERROR; return OMPI_ERROR;
} }
BTL_DEBUG_OUT("Modified to RTR..Qp %d", qp_hndl); BTL_DEBUG(("Modified to RTR..Qp %d", qp_hndl));
/************** RTS *******************/ /************** RTS *******************/
QP_ATTR_MASK_CLR_ALL(qp_attr_mask); QP_ATTR_MASK_CLR_ALL(qp_attr_mask);
@ -811,11 +811,11 @@ int mca_btl_mvapi_endpoint_qp_init_query(
if(VAPI_OK != ret) { if(VAPI_OK != ret) {
return OMPI_ERROR; return OMPI_ERROR;
} }
BTL_DEBUG_OUT("Modified to RTS..Qp %d", qp_hndl); BTL_DEBUG(("Modified to RTS..Qp %d", qp_hndl));
ret = VAPI_query_qp(nic, qp_hndl, &qp_attr, &qp_attr_mask, &qp_init_attr ); ret = VAPI_query_qp(nic, qp_hndl, &qp_attr, &qp_attr_mask, &qp_init_attr );
if (ret != VAPI_OK) { if (ret != VAPI_OK) {
BTL_ERROR("Error modifying the queue pair: %s", VAPI_strerror(ret)); BTL_ERROR(("Error modifying the queue pair: %s", VAPI_strerror(ret)));
return OMPI_ERROR; return OMPI_ERROR;
} }

Просмотреть файл

@ -189,8 +189,8 @@ void mca_btl_mvapi_progress_send_frags(mca_btl_mvapi_endpoint_t*);
post_rr_sub_cnt, \ post_rr_sub_cnt, \
post_rr_sub_desc_post); \ post_rr_sub_desc_post); \
if(VAPI_OK != post_rr_sub_frag->ret) { \ if(VAPI_OK != post_rr_sub_frag->ret) { \
BTL_ERROR("error posting receive descriptors: %s",\ BTL_ERROR(("error posting receive descriptors: %s",\
VAPI_strerror(post_rr_sub_frag->ret)); \ VAPI_strerror(post_rr_sub_frag->ret))); \
} else {\ } else {\
OPAL_THREAD_ADD32(post_rr_sub_rr_posted, post_rr_sub_cnt); \ OPAL_THREAD_ADD32(post_rr_sub_rr_posted, post_rr_sub_cnt); \
}\ }\
@ -227,7 +227,7 @@ void mca_btl_mvapi_progress_send_frags(mca_btl_mvapi_endpoint_t*);
/* cnt, */ /* cnt, */
/* rr_desc_post); */ /* rr_desc_post); */
/* if(VAPI_OK != frag->ret) { */ /* if(VAPI_OK != frag->ret) { */
/* BTL_ERROR("error posting receive descriptors: %s", VAPI_strerror(frag->ret)); */ /* BTL_ERROR(("error posting receive descriptors: %s", VAPI_strerror(frag->ret))); */
/* return OMPI_ERROR; */ /* return OMPI_ERROR; */
/* } */ /* } */
/* OPAL_THREAD_ADD32(rr_posted, cnt); */ /* OPAL_THREAD_ADD32(rr_posted, cnt); */

Просмотреть файл

@ -131,8 +131,7 @@ int mca_btl_openib_del_procs(struct mca_btl_base_module_t* btl,
struct ompi_proc_t **procs, struct ompi_proc_t **procs,
struct mca_btl_base_endpoint_t ** peers) struct mca_btl_base_endpoint_t ** peers)
{ {
/* TODO */ BTL_DEBUG(("TODO\n"));
BTL_DEBUG_OUT("Stub\n");
return OMPI_SUCCESS; return OMPI_SUCCESS;
} }
@ -215,7 +214,7 @@ int mca_btl_openib_free(
} else if(frag->size == mca_btl_openib_component.eager_limit){ } else if(frag->size == mca_btl_openib_component.eager_limit){
MCA_BTL_IB_FRAG_RETURN_EAGER(btl, frag); MCA_BTL_IB_FRAG_RETURN_EAGER(btl, frag);
} else { } else {
BTL_ERROR("invalid descriptor"); BTL_ERROR(("invalid descriptor"));
} }
return OMPI_SUCCESS; return OMPI_SUCCESS;
@ -299,13 +298,13 @@ mca_btl_base_descriptor_t* mca_btl_openib_prepare_src(
rc = mca_mpool_base_remove((void*) openib_reg->base_reg.base); rc = mca_mpool_base_remove((void*) openib_reg->base_reg.base);
if(OMPI_SUCCESS != rc) { if(OMPI_SUCCESS != rc) {
BTL_ERROR("error removing memory region from memory pool tree"); BTL_ERROR(("error removing memory region from memory pool tree"));
return NULL; return NULL;
} }
if(is_leave_pinned) { if(is_leave_pinned) {
if(NULL == opal_list_remove_item(&openib_btl->reg_mru_list, (opal_list_item_t*) openib_reg)){ if(NULL == opal_list_remove_item(&openib_btl->reg_mru_list, (opal_list_item_t*) openib_reg)){
BTL_ERROR("error removing item from reg_mru_list"); BTL_ERROR(("error removing item from reg_mru_list"));
return NULL; return NULL;
} }
} }
@ -324,7 +323,7 @@ mca_btl_base_descriptor_t* mca_btl_openib_prepare_src(
if(rc != OMPI_SUCCESS) { if(rc != OMPI_SUCCESS) {
BTL_ERROR("error inserting memory region into memory pool tree"); BTL_ERROR(("error inserting memory region into memory pool tree"));
return NULL; return NULL;
} }
@ -338,7 +337,7 @@ mca_btl_base_descriptor_t* mca_btl_openib_prepare_src(
} else if(is_leave_pinned) { } else if(is_leave_pinned) {
/* the current memory region is large enough and we should leave the memory pinned */ /* the current memory region is large enough and we should leave the memory pinned */
if(NULL == opal_list_remove_item(&openib_btl->reg_mru_list, (opal_list_item_t*) openib_reg)) { if(NULL == opal_list_remove_item(&openib_btl->reg_mru_list, (opal_list_item_t*) openib_reg)) {
BTL_ERROR("error removing item from reg_mru_list"); BTL_ERROR(("error removing item from reg_mru_list"));
return NULL; return NULL;
} }
opal_list_append(&openib_btl->reg_mru_list, (opal_list_item_t*) openib_reg); opal_list_append(&openib_btl->reg_mru_list, (opal_list_item_t*) openib_reg);
@ -398,7 +397,7 @@ mca_btl_base_descriptor_t* mca_btl_openib_prepare_src(
opal_list_remove_last(&openib_btl->reg_mru_list); opal_list_remove_last(&openib_btl->reg_mru_list);
if( NULL == old_reg) { if( NULL == old_reg) {
BTL_ERROR("error removing item from reg_mru_list"); BTL_ERROR(("error removing item from reg_mru_list"));
return NULL; return NULL;
} }
@ -406,7 +405,7 @@ mca_btl_base_descriptor_t* mca_btl_openib_prepare_src(
rc = mca_mpool_base_remove((void*) old_reg->base_reg.base); rc = mca_mpool_base_remove((void*) old_reg->base_reg.base);
if(OMPI_SUCCESS != rc) { if(OMPI_SUCCESS != rc) {
BTL_ERROR("error removing memory region from memory pool tree"); BTL_ERROR(("error removing memory region from memory pool tree"));
return NULL; return NULL;
} }
@ -452,8 +451,7 @@ mca_btl_base_descriptor_t* mca_btl_openib_prepare_src(
frag->base.des_dst = NULL; frag->base.des_dst = NULL;
frag->base.des_dst_cnt = 0; frag->base.des_dst_cnt = 0;
frag->openib_reg = openib_reg; frag->openib_reg = openib_reg;
BTL_DEBUG_OUT("frag->sg_entry.lkey = %lu .addr = %llu", frag->sg_entry.lkey, frag->sg_entry.addr); BTL_DEBUG(("frag->sg_entry.lkey = %lu .addr = %llu", frag->sg_entry.lkey, frag->sg_entry.addr));
return &frag->base; return &frag->base;
@ -580,7 +578,7 @@ mca_btl_base_descriptor_t* mca_btl_openib_prepare_dst(
rc = mca_mpool_base_remove((void*) openib_reg->base_reg.base); rc = mca_mpool_base_remove((void*) openib_reg->base_reg.base);
if(OMPI_SUCCESS != rc) { if(OMPI_SUCCESS != rc) {
BTL_ERROR("error removing memory region from memory pool tree"); BTL_ERROR(("error removing memory region from memory pool tree"));
return NULL; return NULL;
} }
@ -589,7 +587,7 @@ mca_btl_base_descriptor_t* mca_btl_openib_prepare_dst(
* pull it off the MRU list * pull it off the MRU list
*/ */
if(NULL == opal_list_remove_item(&openib_btl->reg_mru_list, (opal_list_item_t*) openib_reg)) { if(NULL == opal_list_remove_item(&openib_btl->reg_mru_list, (opal_list_item_t*) openib_reg)) {
BTL_ERROR("error removing item from reg_mru_list"); BTL_ERROR(("error removing item from reg_mru_list"));
return NULL; return NULL;
} }
} }
@ -608,7 +606,7 @@ mca_btl_base_descriptor_t* mca_btl_openib_prepare_dst(
(mca_mpool_base_registration_t*) openib_reg); (mca_mpool_base_registration_t*) openib_reg);
if(OMPI_SUCCESS != rc) { if(OMPI_SUCCESS != rc) {
BTL_ERROR("error inserting memory region into memory pool tree"); BTL_ERROR(("error inserting memory region into memory pool tree"));
return NULL; return NULL;
} }
OBJ_RETAIN(openib_reg); OBJ_RETAIN(openib_reg);
@ -623,7 +621,7 @@ mca_btl_base_descriptor_t* mca_btl_openib_prepare_dst(
else if(is_leave_pinned){ else if(is_leave_pinned){
/* the current memory region is large enough and we should leave the memory pinned */ /* the current memory region is large enough and we should leave the memory pinned */
if(NULL == opal_list_remove_item(&openib_btl->reg_mru_list, (opal_list_item_t*) openib_reg)) { if(NULL == opal_list_remove_item(&openib_btl->reg_mru_list, (opal_list_item_t*) openib_reg)) {
BTL_ERROR("error removing item from reg_mru_list"); BTL_ERROR(("error removing item from reg_mru_list"));
return NULL; return NULL;
} }
opal_list_append(&openib_btl->reg_mru_list, (opal_list_item_t*) openib_reg); opal_list_append(&openib_btl->reg_mru_list, (opal_list_item_t*) openib_reg);
@ -647,13 +645,13 @@ mca_btl_base_descriptor_t* mca_btl_openib_prepare_dst(
opal_list_remove_last(&openib_btl->reg_mru_list); opal_list_remove_last(&openib_btl->reg_mru_list);
if( NULL == old_reg) { if( NULL == old_reg) {
BTL_ERROR("error removing item from reg_mru_list"); BTL_ERROR(("error removing item from reg_mru_list"));
return NULL; return NULL;
} }
rc = mca_mpool_base_remove((void*) old_reg->base_reg.base); rc = mca_mpool_base_remove((void*) old_reg->base_reg.base);
if(OMPI_SUCCESS !=rc ) { if(OMPI_SUCCESS !=rc ) {
BTL_ERROR("error removing memory region from memory pool tree"); BTL_ERROR(("error removing memory region from memory pool tree"));
return NULL; return NULL;
} }
@ -673,7 +671,7 @@ mca_btl_base_descriptor_t* mca_btl_openib_prepare_dst(
(void*) (&openib_btl->super), (void*) (&openib_btl->super),
(mca_mpool_base_registration_t*) openib_reg); (mca_mpool_base_registration_t*) openib_reg);
if(OMPI_SUCCESS != rc){ if(OMPI_SUCCESS != rc){
BTL_ERROR("error inserting memory region into memory pool"); BTL_ERROR(("error inserting memory region into memory pool"));
return NULL; return NULL;
} }
@ -705,7 +703,7 @@ mca_btl_base_descriptor_t* mca_btl_openib_prepare_dst(
frag->base.des_src = NULL; frag->base.des_src = NULL;
frag->base.des_src_cnt = 0; frag->base.des_src_cnt = 0;
frag->openib_reg = openib_reg; frag->openib_reg = openib_reg;
BTL_DEBUG_OUT("frag->sg_entry.lkey = %lu .addr = %llu frag->segment.seg_key.key32[0] = %lu" , frag->sg_entry.lkey, frag->sg_entry.addr, frag->segment.seg_key.key32[0]); BTL_DEBUG(("frag->sg_entry.lkey = %lu .addr = %llu frag->segment.seg_key.key32[0] = %lu" , frag->sg_entry.lkey, frag->sg_entry.addr, frag->segment.seg_key.key32[0]));
return &frag->base; return &frag->base;
@ -794,16 +792,16 @@ int mca_btl_openib_put( mca_btl_base_module_t* btl,
frag->sg_entry.addr = (uintptr_t) frag->base.des_src->seg_addr.pval; frag->sg_entry.addr = (uintptr_t) frag->base.des_src->seg_addr.pval;
frag->sg_entry.length = frag->base.des_src->seg_len; frag->sg_entry.length = frag->base.des_src->seg_len;
BTL_DEBUG_OUT("frag->wr_desc.sr_desc.wr.rdma.remote_addr = %llu .rkey = %lu frag->sg_entry.addr = %llu .length = %lu" BTL_DEBUG(("frag->wr_desc.sr_desc.wr.rdma.remote_addr = %llu .rkey = %lu frag->sg_entry.addr = %llu .length = %lu"
, frag->wr_desc.sr_desc.wr.rdma.remote_addr , frag->wr_desc.sr_desc.wr.rdma.remote_addr
, frag->wr_desc.sr_desc.wr.rdma.rkey , frag->wr_desc.sr_desc.wr.rdma.rkey
, frag->sg_entry.addr , frag->sg_entry.addr
, frag->sg_entry.length); , frag->sg_entry.length));
if(ibv_post_send(endpoint->lcl_qp_low, if(ibv_post_send(endpoint->lcl_qp_low,
&frag->wr_desc.sr_desc, &frag->wr_desc.sr_desc,
&bad_wr)){ &bad_wr)){
BTL_ERROR("error posting send request errno says %s", strerror(errno)); BTL_ERROR(("error posting send request errno says %s", strerror(errno)));
return OMPI_ERROR; return OMPI_ERROR;
} }
@ -830,9 +828,9 @@ int mca_btl_openib_module_init(mca_btl_openib_module_t *openib_btl)
if(NULL == openib_btl->ib_pd) { if(NULL == openib_btl->ib_pd) {
BTL_ERROR("error allocating pd for %s errno says %s\n", BTL_ERROR(("error allocating pd for %s errno says %s\n",
ibv_get_device_name(openib_btl->ib_dev), ibv_get_device_name(openib_btl->ib_dev),
strerror(errno)); strerror(errno)));
return OMPI_ERROR; return OMPI_ERROR;
} }
@ -840,18 +838,18 @@ int mca_btl_openib_module_init(mca_btl_openib_module_t *openib_btl)
openib_btl->ib_cq_low = ibv_create_cq(ctx, mca_btl_openib_component.ib_cq_size, NULL); openib_btl->ib_cq_low = ibv_create_cq(ctx, mca_btl_openib_component.ib_cq_size, NULL);
if(NULL == openib_btl->ib_cq_low) { if(NULL == openib_btl->ib_cq_low) {
BTL_ERROR("error creating low priority cq for %s errno says %s\n", BTL_ERROR(("error creating low priority cq for %s errno says %s\n",
ibv_get_device_name(openib_btl->ib_dev), ibv_get_device_name(openib_btl->ib_dev),
strerror(errno)); strerror(errno)));
return OMPI_ERROR; return OMPI_ERROR;
} }
openib_btl->ib_cq_high = ibv_create_cq(ctx, mca_btl_openib_component.ib_cq_size, NULL); openib_btl->ib_cq_high = ibv_create_cq(ctx, mca_btl_openib_component.ib_cq_size, NULL);
if(NULL == openib_btl->ib_cq_high) { if(NULL == openib_btl->ib_cq_high) {
BTL_ERROR("error creating high priority cq for %s errno says %s\n", BTL_ERROR(("error creating high priority cq for %s errno says %s\n",
ibv_get_device_name(openib_btl->ib_dev), ibv_get_device_name(openib_btl->ib_dev),
strerror(errno)); strerror(errno)));
return OMPI_ERROR; return OMPI_ERROR;
} }

Просмотреть файл

@ -265,7 +265,7 @@ mca_btl_base_module_t** mca_btl_openib_component_init(int *num_btl_modules,
num_devs++; num_devs++;
if(0 == num_devs) { if(0 == num_devs) {
BTL_ERROR("No hca's found on this host!"); BTL_ERROR(("No hca's found on this host!"));
return NULL; return NULL;
} }
@ -300,12 +300,12 @@ mca_btl_base_module_t** mca_btl_openib_component_init(int *num_btl_modules,
ib_dev_context = ibv_open_device(ib_dev); ib_dev_context = ibv_open_device(ib_dev);
if(!ib_dev_context) { if(!ib_dev_context) {
BTL_ERROR(" error obtaining device context for %s errno says %s\n", ibv_get_device_name(ib_dev), strerror(errno)); BTL_ERROR((" error obtaining device context for %s errno says %s\n", ibv_get_device_name(ib_dev), strerror(errno)));
return NULL; return NULL;
} }
if(ibv_query_device(ib_dev_context, &ib_dev_attr)){ if(ibv_query_device(ib_dev_context, &ib_dev_attr)){
BTL_ERROR("error obtaining device attributes for %s errno says %s\n", ibv_get_device_name(ib_dev), strerror(errno)); BTL_ERROR(("error obtaining device attributes for %s errno says %s\n", ibv_get_device_name(ib_dev), strerror(errno)));
return NULL; return NULL;
} }
@ -316,8 +316,8 @@ mca_btl_base_module_t** mca_btl_openib_component_init(int *num_btl_modules,
struct ibv_port_attr* ib_port_attr; struct ibv_port_attr* ib_port_attr;
ib_port_attr = (struct ibv_port_attr*) malloc(sizeof(struct ibv_port_attr)); ib_port_attr = (struct ibv_port_attr*) malloc(sizeof(struct ibv_port_attr));
if(ibv_query_port(ib_dev_context, (uint8_t) j, ib_port_attr)){ if(ibv_query_port(ib_dev_context, (uint8_t) j, ib_port_attr)){
BTL_ERROR("error getting port attributes for device %s port number %d errno says %s", BTL_ERROR(("error getting port attributes for device %s port number %d errno says %s",
ibv_get_device_name(ib_dev), j, strerror(errno)); ibv_get_device_name(ib_dev), j, strerror(errno)));
return NULL; return NULL;
} }
@ -402,7 +402,7 @@ mca_btl_base_module_t** mca_btl_openib_component_init(int *num_btl_modules,
&mpool_resources); &mpool_resources);
if(NULL == openib_btl->ib_pool) { if(NULL == openib_btl->ib_pool) {
BTL_ERROR("error creating vapi memory pool! aborting openib btl initialization"); BTL_ERROR(("error creating vapi memory pool! aborting openib btl initialization"));
return NULL; return NULL;
} }
@ -510,27 +510,27 @@ int mca_btl_openib_component_progress()
do{ do{
ne=ibv_poll_cq(openib_btl->ib_cq_high, 1, &wc ); ne=ibv_poll_cq(openib_btl->ib_cq_high, 1, &wc );
if(ne < 0 ){ if(ne < 0 ){
BTL_ERROR("error polling CQ with %d errno says %s\n", ne, strerror(errno)); BTL_ERROR(("error polling CQ with %d errno says %s\n", ne, strerror(errno)));
return OMPI_ERROR; return OMPI_ERROR;
} }
else if(wc.status != IBV_WC_SUCCESS) { else if(wc.status != IBV_WC_SUCCESS) {
BTL_ERROR("error polling CQ with status %d for wr_id %llu\n", BTL_ERROR(("error polling CQ with status %d for wr_id %llu\n",
wc.status, wc.wr_id); wc.status, wc.wr_id));
return OMPI_ERROR; return OMPI_ERROR;
} }
else if(1 == ne) { else if(1 == ne) {
BTL_DEBUG_OUT("completion queue event says opcode is %d\n", wc.opcode); BTL_DEBUG(("completion queue event says opcode is %d\n", wc.opcode));
/* Handle work completions */ /* Handle work completions */
switch(wc.opcode) { switch(wc.opcode) {
case IBV_WC_RECV_RDMA_WITH_IMM: case IBV_WC_RECV_RDMA_WITH_IMM:
BTL_ERROR("Got an RDMA with Immediate data Not supported!"); BTL_ERROR(("Got an RDMA with Immediate data Not supported!"));
return OMPI_ERROR; return OMPI_ERROR;
case IBV_WC_RECV: case IBV_WC_RECV:
/* Process a RECV */ /* Process a RECV */
BTL_DEBUG_OUT("Got an recv on the completion queue"); BTL_DEBUG(("Got an recv on the completion queue"));
frag = (mca_btl_openib_frag_t*) wc.wr_id; frag = (mca_btl_openib_frag_t*) wc.wr_id;
endpoint = (mca_btl_openib_endpoint_t*) frag->endpoint; endpoint = (mca_btl_openib_endpoint_t*) frag->endpoint;
frag->rc=OMPI_SUCCESS; frag->rc=OMPI_SUCCESS;
@ -566,7 +566,7 @@ int mca_btl_openib_component_progress()
break; break;
default: default:
BTL_ERROR("Unhandled work completion opcode is %d", wc.opcode); BTL_ERROR(("Unhandled work completion opcode is %d", wc.opcode));
break; break;
} }
} }
@ -575,24 +575,24 @@ int mca_btl_openib_component_progress()
ne=ibv_poll_cq(openib_btl->ib_cq_low, 1, &wc ); ne=ibv_poll_cq(openib_btl->ib_cq_low, 1, &wc );
if(ne < 0){ if(ne < 0){
BTL_ERROR("error polling CQ with %d errno says %s", ne, strerror(errno)); BTL_ERROR(("error polling CQ with %d errno says %s", ne, strerror(errno)));
return OMPI_ERROR; return OMPI_ERROR;
} }
else if(wc.status != IBV_WC_SUCCESS) { else if(wc.status != IBV_WC_SUCCESS) {
BTL_ERROR("error polling CQ with status %d for wr_id %llu", BTL_ERROR(("error polling CQ with status %d for wr_id %llu",
wc.status, wc.wr_id); wc.status, wc.wr_id));
return OMPI_ERROR; return OMPI_ERROR;
} }
else if(1 == ne) { else if(1 == ne) {
/* Handle n/w completions */ /* Handle n/w completions */
switch(wc.opcode) { switch(wc.opcode) {
case IBV_WC_RECV_RDMA_WITH_IMM: case IBV_WC_RECV_RDMA_WITH_IMM:
BTL_ERROR("Got an RDMA with Immediate data Not supported!"); BTL_ERROR(("Got an RDMA with Immediate data Not supported!"));
return OMPI_ERROR; return OMPI_ERROR;
case IBV_WC_RECV: case IBV_WC_RECV:
/* process a recv completion (this should only occur for a send not an rdma) */ /* process a recv completion (this should only occur for a send not an rdma) */
BTL_DEBUG_OUT( "Got a recv completion"); BTL_DEBUG(( "Got a recv completion"));
frag = (mca_btl_openib_frag_t*) wc.wr_id; frag = (mca_btl_openib_frag_t*) wc.wr_id;
endpoint = (mca_btl_openib_endpoint_t*) frag->endpoint; endpoint = (mca_btl_openib_endpoint_t*) frag->endpoint;
frag->rc=OMPI_SUCCESS; frag->rc=OMPI_SUCCESS;
@ -624,7 +624,7 @@ int mca_btl_openib_component_progress()
break; break;
default: default:
BTL_ERROR("Unhandled work completion opcode is %d", wc.opcode); BTL_ERROR(("Unhandled work completion opcode is %d", wc.opcode));
break; break;
} }
} }

Просмотреть файл

@ -93,7 +93,7 @@ static inline int mca_btl_openib_endpoint_post_send(mca_btl_openib_module_t* ope
if(ibv_post_send(ib_qp, if(ibv_post_send(ib_qp,
&frag->wr_desc.sr_desc, &frag->wr_desc.sr_desc,
&bad_wr)) { &bad_wr)) {
BTL_ERROR("error posting send request errno says %s", strerror(errno)); BTL_ERROR(("error posting send request errno says %s", strerror(errno)));
return OMPI_ERROR; return OMPI_ERROR;
} }
MCA_BTL_OPENIB_ENDPOINT_POST_RR_HIGH(endpoint, 1); MCA_BTL_OPENIB_ENDPOINT_POST_RR_HIGH(endpoint, 1);
@ -206,10 +206,10 @@ static int mca_btl_openib_endpoint_send_connect_req(mca_btl_base_endpoint_t* end
mca_btl_openib_endpoint_send_cb, NULL); mca_btl_openib_endpoint_send_cb, NULL);
BTL_DEBUG_OUT("Sending High Priority QP num = %d, Low Priority QP num = %d, LID = %d", BTL_DEBUG(("Sending High Priority QP num = %d, Low Priority QP num = %d, LID = %d",
endpoint->lcl_qp_high->qp_num, endpoint->lcl_qp_high->qp_num,
endpoint->lcl_qp_low->qp_num, endpoint->lcl_qp_low->qp_num,
endpoint->endpoint_btl->ib_port_attr->lid); endpoint->endpoint_btl->ib_port_attr->lid));
if(rc < 0) { if(rc < 0) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
@ -297,10 +297,10 @@ static int mca_btl_openib_endpoint_set_remote_info(mca_btl_base_endpoint_t* endp
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
return rc; return rc;
} }
BTL_DEBUG_OUT("Received High Priority QP num = %d, Low Priority QP num %d, LID = %d", BTL_DEBUG(("Received High Priority QP num = %d, Low Priority QP num %d, LID = %d",
endpoint->rem_qp_num_high, endpoint->rem_qp_num_high,
endpoint->rem_qp_num_low, endpoint->rem_qp_num_low,
endpoint->rem_lid); endpoint->rem_lid));
return ORTE_SUCCESS; return ORTE_SUCCESS;
} }
@ -328,7 +328,7 @@ static int mca_btl_openib_endpoint_start_connect(mca_btl_base_endpoint_t* endpoi
openib_btl->ib_cq_high, openib_btl->ib_cq_high,
endpoint->lcl_qp_attr_high, endpoint->lcl_qp_attr_high,
&endpoint->lcl_qp_high))) { &endpoint->lcl_qp_high))) {
BTL_ERROR("error creating queue pair, error code %d", rc); BTL_ERROR(("error creating queue pair, error code %d", rc));
return rc; return rc;
} }
srand48(getpid() * time(NULL)); srand48(getpid() * time(NULL));
@ -340,20 +340,20 @@ static int mca_btl_openib_endpoint_start_connect(mca_btl_base_endpoint_t* endpoi
openib_btl->ib_cq_low, openib_btl->ib_cq_low,
endpoint->lcl_qp_attr_low, endpoint->lcl_qp_attr_low,
&endpoint->lcl_qp_low))) { &endpoint->lcl_qp_low))) {
BTL_ERROR("error creating queue pair, error code %d", rc); BTL_ERROR(("error creating queue pair, error code %d", rc));
return rc; return rc;
} }
endpoint->lcl_psn_low = lrand48() & 0xffffff; endpoint->lcl_psn_low = lrand48() & 0xffffff;
BTL_DEBUG_OUT("Initialized High Priority QP num = %d, Low Priority QP num = %d, LID = %d", BTL_DEBUG(("Initialized High Priority QP num = %d, Low Priority QP num = %d, LID = %d",
endpoint->lcl_qp_high->qp_num, endpoint->lcl_qp_high->qp_num,
endpoint->lcl_qp_low->qp_num, endpoint->lcl_qp_low->qp_num,
openib_btl->ib_port_attr->lid); openib_btl->ib_port_attr->lid));
/* Send connection info over to remote endpoint */ /* Send connection info over to remote endpoint */
endpoint->endpoint_state = MCA_BTL_IB_CONNECTING; endpoint->endpoint_state = MCA_BTL_IB_CONNECTING;
if(OMPI_SUCCESS != (rc = mca_btl_openib_endpoint_send_connect_req(endpoint))) { if(OMPI_SUCCESS != (rc = mca_btl_openib_endpoint_send_connect_req(endpoint))) {
BTL_ERROR("error sending connect request, error code %d", rc); BTL_ERROR(("error sending connect request, error code %d", rc));
return rc; return rc;
} }
return OMPI_SUCCESS; return OMPI_SUCCESS;
@ -375,7 +375,7 @@ static int mca_btl_openib_endpoint_reply_start_connect(mca_btl_openib_endpoint_t
openib_btl->ib_cq_high, openib_btl->ib_cq_high,
endpoint->lcl_qp_attr_high, endpoint->lcl_qp_attr_high,
&endpoint->lcl_qp_high))) { &endpoint->lcl_qp_high))) {
BTL_ERROR("error creating queue pair, error code %d", rc); BTL_ERROR(("error creating queue pair, error code %d", rc));
return rc; return rc;
} }
srand48(getpid() * time(NULL)); srand48(getpid() * time(NULL));
@ -387,15 +387,15 @@ static int mca_btl_openib_endpoint_reply_start_connect(mca_btl_openib_endpoint_t
openib_btl->ib_cq_low, openib_btl->ib_cq_low,
endpoint->lcl_qp_attr_low, endpoint->lcl_qp_attr_low,
&endpoint->lcl_qp_low))) { &endpoint->lcl_qp_low))) {
BTL_ERROR("error creating queue pair, error code %d", rc); BTL_ERROR(("error creating queue pair, error code %d", rc));
return rc; return rc;
} }
endpoint->lcl_psn_low = lrand48() & 0xffffff; endpoint->lcl_psn_low = lrand48() & 0xffffff;
BTL_DEBUG_OUT("Initialized High Priority QP num = %d, Low Priority QP num = %d, LID = %d", BTL_DEBUG(("Initialized High Priority QP num = %d, Low Priority QP num = %d, LID = %d",
endpoint->lcl_qp_high->qp_num, endpoint->lcl_qp_high->qp_num,
endpoint->lcl_qp_low->qp_num, endpoint->lcl_qp_low->qp_num,
openib_btl->ib_port_attr->lid); openib_btl->ib_port_attr->lid));
/* Set the remote side info */ /* Set the remote side info */
@ -405,13 +405,13 @@ static int mca_btl_openib_endpoint_reply_start_connect(mca_btl_openib_endpoint_t
rc = mca_btl_openib_endpoint_connect(endpoint); rc = mca_btl_openib_endpoint_connect(endpoint);
if(rc != OMPI_SUCCESS) { if(rc != OMPI_SUCCESS) {
BTL_ERROR("error in endpoint connect error code is %d", rc); BTL_ERROR(("error in endpoint connect error code is %d", rc));
return rc; return rc;
} }
/* Send connection info over to remote endpoint */ /* Send connection info over to remote endpoint */
if(OMPI_SUCCESS != (rc = mca_btl_openib_endpoint_send_connect_req(endpoint))) { if(OMPI_SUCCESS != (rc = mca_btl_openib_endpoint_send_connect_req(endpoint))) {
BTL_ERROR("error in endpoint send connect request error code is %d", rc); BTL_ERROR(("error in endpoint send connect request error code is %d", rc));
return rc; return rc;
} }
return OMPI_SUCCESS; return OMPI_SUCCESS;
@ -476,7 +476,7 @@ static void mca_btl_openib_endpoint_recv(
* and then reply with our QP information */ * and then reply with our QP information */
if(OMPI_SUCCESS != (rc = mca_btl_openib_endpoint_reply_start_connect(ib_endpoint, buffer))) { if(OMPI_SUCCESS != (rc = mca_btl_openib_endpoint_reply_start_connect(ib_endpoint, buffer))) {
BTL_ERROR("error in endpoint reply start connect"); BTL_ERROR(("error in endpoint reply start connect"));
break; break;
} }
@ -488,7 +488,7 @@ static void mca_btl_openib_endpoint_recv(
mca_btl_openib_endpoint_set_remote_info(ib_endpoint, buffer); mca_btl_openib_endpoint_set_remote_info(ib_endpoint, buffer);
if(OMPI_SUCCESS != (rc = mca_btl_openib_endpoint_connect(ib_endpoint))) { if(OMPI_SUCCESS != (rc = mca_btl_openib_endpoint_connect(ib_endpoint))) {
BTL_ERROR("endpoint connect error: %d", rc); BTL_ERROR(("endpoint connect error: %d", rc));
break; break;
} }
@ -508,7 +508,7 @@ static void mca_btl_openib_endpoint_recv(
break; break;
default : default :
BTL_ERROR("Invalid endpoint state %d", endpoint_state); BTL_ERROR(("Invalid endpoint state %d", endpoint_state));
} }
break; break;
@ -553,7 +553,7 @@ int mca_btl_openib_endpoint_send(
switch(endpoint->endpoint_state) { switch(endpoint->endpoint_state) {
case MCA_BTL_IB_CONNECTING: case MCA_BTL_IB_CONNECTING:
BTL_DEBUG_OUT("Queing because state is connecting"); BTL_DEBUG(("Queing because state is connecting"));
opal_list_append(&endpoint->pending_send_frags, opal_list_append(&endpoint->pending_send_frags,
(opal_list_item_t *)frag); (opal_list_item_t *)frag);
@ -563,7 +563,7 @@ int mca_btl_openib_endpoint_send(
case MCA_BTL_IB_CONNECT_ACK: case MCA_BTL_IB_CONNECT_ACK:
BTL_DEBUG_OUT("Queuing because waiting for ack"); BTL_DEBUG(("Queuing because waiting for ack"));
opal_list_append(&endpoint->pending_send_frags, opal_list_append(&endpoint->pending_send_frags,
(opal_list_item_t *)frag); (opal_list_item_t *)frag);
@ -573,7 +573,7 @@ int mca_btl_openib_endpoint_send(
case MCA_BTL_IB_CLOSED: case MCA_BTL_IB_CLOSED:
BTL_DEBUG_OUT("Connection to endpoint closed ... connecting ..."); BTL_DEBUG(("Connection to endpoint closed ... connecting ..."));
opal_list_append(&endpoint->pending_send_frags, opal_list_append(&endpoint->pending_send_frags,
(opal_list_item_t *)frag); (opal_list_item_t *)frag);
rc = mca_btl_openib_endpoint_start_connect(endpoint); rc = mca_btl_openib_endpoint_start_connect(endpoint);
@ -587,10 +587,10 @@ int mca_btl_openib_endpoint_send(
case MCA_BTL_IB_CONNECTED: case MCA_BTL_IB_CONNECTED:
{ {
openib_btl = endpoint->endpoint_btl; openib_btl = endpoint->endpoint_btl;
BTL_DEBUG_OUT("Send to : %d, len : %lu, frag : %llu", BTL_DEBUG(("Send to : %d, len : %lu, frag : %llu",
endpoint->endpoint_proc->proc_guid.vpid, endpoint->endpoint_proc->proc_guid.vpid,
frag->sg_entry.length, frag->sg_entry.length,
(unsigned long long) frag); (unsigned long long) frag));
rc = mca_btl_openib_endpoint_post_send(openib_btl, endpoint, frag); rc = mca_btl_openib_endpoint_post_send(openib_btl, endpoint, frag);
break; break;
} }
@ -628,7 +628,7 @@ void mca_btl_openib_progress_send_frags(mca_btl_openib_endpoint_t* endpoint)
/* We need to post this one */ /* We need to post this one */
if(OMPI_SUCCESS != mca_btl_openib_endpoint_post_send(openib_btl, endpoint, frag)) if(OMPI_SUCCESS != mca_btl_openib_endpoint_post_send(openib_btl, endpoint, frag))
BTL_ERROR("Error posting send"); BTL_ERROR(("Error posting send"));
} }
} }
@ -716,7 +716,7 @@ int mca_btl_openib_endpoint_create_qp(
my_qp = ibv_create_qp(pd, &qp_init_attr); my_qp = ibv_create_qp(pd, &qp_init_attr);
if(NULL == my_qp) { if(NULL == my_qp) {
BTL_ERROR("error creating qp errno says %s", strerror(errno)); BTL_ERROR(("error creating qp errno says %s", strerror(errno)));
return OMPI_ERROR; return OMPI_ERROR;
} }
(*qp) = my_qp; (*qp) = my_qp;
@ -735,7 +735,7 @@ int mca_btl_openib_endpoint_create_qp(
IBV_QP_PKEY_INDEX | IBV_QP_PKEY_INDEX |
IBV_QP_PORT | IBV_QP_PORT |
IBV_QP_ACCESS_FLAGS )) { IBV_QP_ACCESS_FLAGS )) {
BTL_ERROR("error modifying qp to INIT errno says %s", strerror(errno)); BTL_ERROR(("error modifying qp to INIT errno says %s", strerror(errno)));
return OMPI_ERROR; return OMPI_ERROR;
} }
} }
@ -781,7 +781,7 @@ int mca_btl_openib_endpoint_qp_init_query(
IBV_QP_RQ_PSN | IBV_QP_RQ_PSN |
IBV_QP_MAX_DEST_RD_ATOMIC | IBV_QP_MAX_DEST_RD_ATOMIC |
IBV_QP_MIN_RNR_TIMER)) { IBV_QP_MIN_RNR_TIMER)) {
BTL_ERROR("error modifing QP to RTR errno says %s", strerror(errno)); BTL_ERROR(("error modifing QP to RTR errno says %s", strerror(errno)));
return OMPI_ERROR; return OMPI_ERROR;
} }
attr->qp_state = IBV_QPS_RTS; attr->qp_state = IBV_QPS_RTS;
@ -797,7 +797,7 @@ int mca_btl_openib_endpoint_qp_init_query(
IBV_QP_RNR_RETRY | IBV_QP_RNR_RETRY |
IBV_QP_SQ_PSN | IBV_QP_SQ_PSN |
IBV_QP_MAX_QP_RD_ATOMIC)) { IBV_QP_MAX_QP_RD_ATOMIC)) {
BTL_ERROR("error modifying QP to RTS errno says %s", strerror(errno)); BTL_ERROR(("error modifying QP to RTS errno says %s", strerror(errno)));
return OMPI_ERROR; return OMPI_ERROR;
} }
return OMPI_SUCCESS; return OMPI_SUCCESS;

Просмотреть файл

@ -188,7 +188,7 @@ void mca_btl_openib_progress_send_frags(mca_btl_openib_endpoint_t*);
if(ibv_post_recv(post_rr_sub_qp, \ if(ibv_post_recv(post_rr_sub_qp, \
&post_rr_sub_frag->wr_desc.rr_desc, \ &post_rr_sub_frag->wr_desc.rr_desc, \
&post_rr_sub_bad_wr)) { \ &post_rr_sub_bad_wr)) { \
BTL_ERROR("error posting receive errno says %s\n", strerror(errno)); \ BTL_ERROR(("error posting receive errno says %s\n", strerror(errno))); \
return OMPI_ERROR; \ return OMPI_ERROR; \
}\ }\
}\ }\

56
ompi/mca/btl/tcp/Makefile.am Обычный файл
Просмотреть файл

@ -0,0 +1,56 @@
#
# Copyright (c) 2004-2005 The Trustees of Indiana University.
# All rights reserved.
# Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
# All rights reserved.
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
# Use the top-level Makefile.options
include $(top_ompi_srcdir)/config/Makefile.options
sources = \
btl_tcp.c \
btl_tcp.h \
btl_tcp_component.c \
btl_tcp_endpoint.c \
btl_tcp_endpoint.h \
btl_tcp_frag.c \
btl_tcp_frag.h \
btl_tcp_proc.c \
btl_tcp_proc.h \
btl_tcp_error.h
# Make the output library in this directory, and name it either
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
# (for static builds).
if OMPI_BUILD_btl_tcp_DSO
lib =
lib_sources =
component = mca_btl_tcp.la
component_sources = $(sources)
else
lib = libmca_btl_tcp.la
lib_sources = $(sources)
component =
component_sources =
endif
mcacomponentdir = $(libdir)/openmpi
mcacomponent_LTLIBRARIES = $(component)
mca_btl_tcp_la_SOURCES = $(component_sources)
mca_btl_tcp_la_LDFLAGS = -module -avoid-version
noinst_LTLIBRARIES = $(lib)
libmca_btl_tcp_la_SOURCES = $(lib_sources)
libmca_btl_tcp_la_LDFLAGS = -module -avoid-version

426
ompi/mca/btl/tcp/btl_tcp.c Обычный файл
Просмотреть файл

@ -0,0 +1,426 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include <string.h>
#include "util/output.h"
#include "util/if.h"
#include "mca/pml/pml.h"
#include "mca/btl/btl.h"
#include "btl_tcp.h"
#include "btl_tcp_frag.h"
#include "btl_tcp_proc.h"
#include "btl_tcp_endpoint.h"
#include "datatype/convertor.h"
#include "mca/mpool/base/base.h"
#include "mca/mpool/mpool.h"
mca_btl_tcp_module_t mca_btl_tcp_module = {
{
&mca_btl_tcp_component.super,
0, /* max size of first fragment */
0, /* min send fragment size */
0, /* max send fragment size */
0, /* min rdma fragment size */
0, /* max rdma fragment size */
0, /* exclusivity */
0, /* latency */
0, /* bandwidth */
0, /* flags */
mca_btl_tcp_add_procs,
mca_btl_tcp_del_procs,
mca_btl_tcp_register,
mca_btl_tcp_finalize,
mca_btl_tcp_alloc,
mca_btl_tcp_free,
mca_btl_tcp_prepare_src,
mca_btl_tcp_prepare_dst,
mca_btl_tcp_send,
mca_btl_tcp_put,
NULL /* get */
}
};
/**
*
*/
int mca_btl_tcp_add_procs(
struct mca_btl_base_module_t* btl,
size_t nprocs,
struct ompi_proc_t **ompi_procs,
struct mca_btl_base_endpoint_t** peers,
ompi_bitmap_t* reachable)
{
mca_btl_tcp_module_t* tcp_btl = (mca_btl_tcp_module_t*)btl;
int i, rc;
for(i = 0; i < (int) nprocs; i++) {
struct ompi_proc_t* ompi_proc = ompi_procs[i];
mca_btl_tcp_proc_t* tcp_proc;
mca_btl_base_endpoint_t* tcp_endpoint;
if(NULL == (tcp_proc = mca_btl_tcp_proc_create(ompi_proc))) {
return OMPI_ERR_OUT_OF_RESOURCE;
}
/*
* Check to make sure that the peer has at least as many interface
* addresses exported as we are trying to use. If not, then
* don't bind this BTL instance to the proc.
*/
OPAL_THREAD_LOCK(&tcp_proc->proc_lock);
/* The btl_proc datastructure is shared by all TCP BTL
* instances that are trying to reach this destination.
* Cache the peer instance on the btl_proc.
*/
tcp_endpoint = OBJ_NEW(mca_btl_tcp_endpoint_t);
if(NULL == tcp_endpoint) {
OPAL_THREAD_UNLOCK(&module_proc->proc_lock);
return OMPI_ERR_OUT_OF_RESOURCE;
}
tcp_endpoint->endpoint_btl = tcp_btl;
rc = mca_btl_tcp_proc_insert(tcp_proc, tcp_endpoint);
if(rc != OMPI_SUCCESS) {
OBJ_RELEASE(tcp_endpoint);
OPAL_THREAD_UNLOCK(&module_proc->proc_lock);
continue;
}
ompi_bitmap_set_bit(reachable, i);
OPAL_THREAD_UNLOCK(&module_proc->proc_lock);
peers[i] = tcp_endpoint;
opal_list_append(&tcp_btl->tcp_endpoints, (opal_list_item_t*)tcp_endpoint);
/* we increase the count of MPI users of the event library
once per peer, so that we are used until we aren't
connected to a peer */
opal_progress_event_increment();
}
return OMPI_SUCCESS;
}
int mca_btl_tcp_del_procs(struct mca_btl_base_module_t* btl,
size_t nprocs,
struct ompi_proc_t **procs,
struct mca_btl_base_endpoint_t ** peers)
{
/* TODO */
return OMPI_SUCCESS;
}
/**
* Register callback function to support send/recv semantics
*/
int mca_btl_tcp_register(
struct mca_btl_base_module_t* btl,
mca_btl_base_tag_t tag,
mca_btl_base_module_recv_cb_fn_t cbfunc,
void* cbdata)
{
mca_btl_tcp_module_t* tcp_btl = (mca_btl_tcp_module_t*) btl;
tcp_btl->tcp_reg[tag].cbfunc = cbfunc;
tcp_btl->tcp_reg[tag].cbdata = cbdata;
return OMPI_SUCCESS;
}
/**
* Allocate a segment.
*
* @param btl (IN) BTL module
* @param size (IN) Request segment size.
*/
mca_btl_base_descriptor_t* mca_btl_tcp_alloc(
struct mca_btl_base_module_t* btl,
size_t size)
{
mca_btl_tcp_frag_t* frag;
int rc;
if(size <= btl->btl_eager_limit){
MCA_BTL_TCP_FRAG_ALLOC_EAGER(frag, rc);
frag->segments[0].seg_len =
size <= btl->btl_eager_limit ?
size : btl->btl_eager_limit ;
} else {
MCA_BTL_TCP_FRAG_ALLOC_MAX(frag, rc);
frag->segments[0].seg_len =
size <= btl->btl_max_send_size ?
size : btl->btl_max_send_size ;
}
frag->segments[0].seg_addr.pval = frag+1;
frag->base.des_src = frag->segments;
frag->base.des_src_cnt = 1;
frag->base.des_dst = NULL;
frag->base.des_dst_cnt = 0;
frag->base.des_flags = 0;
frag->btl = (mca_btl_tcp_module_t*)btl;
return (mca_btl_base_descriptor_t*)frag;
}
/**
* Return a segment
*/
int mca_btl_tcp_free(
struct mca_btl_base_module_t* btl,
mca_btl_base_descriptor_t* des)
{
mca_btl_tcp_frag_t* frag = (mca_btl_tcp_frag_t*)des;
if(frag->size == 0) {
MCA_BTL_TCP_FRAG_RETURN_USER(frag);
} else if(frag->size == btl->btl_eager_limit){
MCA_BTL_TCP_FRAG_RETURN_EAGER(frag);
} else if(frag->size == btl->btl_max_send_size) {
MCA_BTL_TCP_FRAG_RETURN_MAX(frag);
} else {
return OMPI_ERR_BAD_PARAM;
}
return OMPI_SUCCESS;
}
/**
* Pack data and return a descriptor that can be
* used for send/put.
*
* @param btl (IN) BTL module
* @param peer (IN) BTL peer addressing
*/
mca_btl_base_descriptor_t* mca_btl_tcp_prepare_src(
struct mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* endpoint,
struct mca_mpool_base_registration_t* registration,
struct ompi_convertor_t* convertor,
size_t reserve,
size_t* size)
{
mca_btl_tcp_frag_t* frag;
struct iovec iov;
uint32_t iov_count = 1;
size_t max_data = *size;
int32_t free_after;
int rc;
/*
* if we aren't pinning the data and the requested size is less
* than the eager limit pack into a fragment from the eager pool
*/
if (max_data+reserve <= btl->btl_eager_limit) {
MCA_BTL_TCP_FRAG_ALLOC_EAGER(frag, rc);
}
/*
* otherwise pack as much data as we can into a fragment
* that is the max send size.
*/
else {
MCA_BTL_TCP_FRAG_ALLOC_MAX(frag, rc);
}
if(NULL == frag) {
return NULL;
}
if(ompi_convertor_need_buffers(convertor)) {
if (max_data + reserve > frag->size) {
max_data = frag->size - reserve;
}
iov.iov_len = max_data;
iov.iov_base = (void*)(((unsigned char*)(frag+1)) + reserve);
rc = ompi_convertor_pack(convertor, &iov, &iov_count, &max_data, &free_after);
*size = max_data;
if( rc < 0 ) {
MCA_BTL_TCP_FRAG_RETURN_EAGER(frag);
return NULL;
}
frag->segments[0].seg_addr.pval = iov.iov_base;
frag->segments[0].seg_len = max_data + reserve;
frag->base.des_src_cnt = 1;
} else {
iov.iov_len = max_data;
iov.iov_base = NULL;
rc = ompi_convertor_pack(convertor, &iov, &iov_count, &max_data, &free_after);
*size = max_data;
if( rc < 0 ) {
MCA_BTL_TCP_FRAG_RETURN_EAGER(frag);
return NULL;
}
frag->segments[0].seg_addr.pval = frag+1;
frag->segments[0].seg_len = reserve;
frag->segments[1].seg_addr.pval = iov.iov_base;
frag->segments[1].seg_len = max_data;
frag->base.des_src_cnt = 2;
}
frag->base.des_src = frag->segments;
frag->base.des_dst = NULL;
frag->base.des_dst_cnt = 0;
frag->base.des_flags = 0;
return &frag->base;
}
/**
* Prepare a descriptor for send/rdma using the supplied
* convertor. If the convertor references data that is contigous,
* the descriptor may simply point to the user buffer. Otherwise,
* this routine is responsible for allocating buffer space and
* packing if required.
*
* @param btl (IN) BTL module
* @param endpoint (IN) BTL peer addressing
* @param convertor (IN) Data type convertor
* @param reserve (IN) Additional bytes requested by upper layer to precede user data
* @param size (IN/OUT) Number of bytes to prepare (IN), number of bytes actually prepared (OUT)
*/
mca_btl_base_descriptor_t* mca_btl_tcp_prepare_dst(
struct mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* endpoint,
struct mca_mpool_base_registration_t* registration,
struct ompi_convertor_t* convertor,
size_t reserve,
size_t* size)
{
mca_btl_tcp_frag_t* frag;
int rc;
MCA_BTL_TCP_FRAG_ALLOC_USER(frag, rc);
if(NULL == frag) {
return NULL;
}
frag->segments->seg_len = *size;
frag->segments->seg_addr.pval = convertor->pBaseBuf + convertor->bConverted;
frag->base.des_src = NULL;
frag->base.des_src_cnt = 0;
frag->base.des_dst = frag->segments;
frag->base.des_dst_cnt = 1;
frag->base.des_flags = 0;
return &frag->base;
}
/**
* Initiate an asynchronous send.
*
* @param btl (IN) BTL module
* @param endpoint (IN) BTL addressing information
* @param descriptor (IN) Description of the data to be transfered
* @param tag (IN) The tag value used to notify the peer.
*/
int mca_btl_tcp_send(
struct mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* endpoint,
struct mca_btl_base_descriptor_t* descriptor,
mca_btl_base_tag_t tag)
{
mca_btl_tcp_module_t* tcp_btl = (mca_btl_tcp_module_t*) btl;
mca_btl_tcp_frag_t* frag = (mca_btl_tcp_frag_t*)descriptor;
frag->btl = tcp_btl;
frag->hdr.base.tag = tag;
frag->hdr.type = MCA_BTL_TCP_HDR_TYPE_SEND;
MCA_BTL_TCP_FRAG_INIT_SRC(frag,endpoint);
return mca_btl_tcp_endpoint_send(endpoint,frag);
}
/**
* Initiate an asynchronous put.
*
* @param btl (IN) BTL module
* @param endpoint (IN) BTL addressing information
* @param descriptor (IN) Description of the data to be transferred
*/
int mca_btl_tcp_put(
mca_btl_base_module_t* btl,
mca_btl_base_endpoint_t* endpoint,
mca_btl_base_descriptor_t* descriptor)
{
mca_btl_tcp_module_t* tcp_btl = (mca_btl_tcp_module_t*) btl;
mca_btl_tcp_frag_t* frag = (mca_btl_tcp_frag_t*) descriptor;
frag->btl = tcp_btl;
frag->endpoint = endpoint;
/* TODO */
return OMPI_ERR_NOT_IMPLEMENTED;
}
/**
* Initiate an asynchronous get.
*
* @param btl (IN) BTL module
* @param endpoint (IN) BTL addressing information
* @param descriptor (IN) Description of the data to be transferred
*
*/
int mca_btl_tcp_get(
mca_btl_base_module_t* btl,
mca_btl_base_endpoint_t* endpoint,
mca_btl_base_descriptor_t* descriptor)
{
mca_btl_tcp_module_t* tcp_btl = (mca_btl_tcp_module_t*) btl;
mca_btl_tcp_frag_t* frag = (mca_btl_tcp_frag_t*) descriptor;
frag->btl = tcp_btl;
frag->endpoint = endpoint;
/* TODO */
return OMPI_ERR_NOT_IMPLEMENTED;
}
/*
* Cleanup/release module resources.
*/
int mca_btl_tcp_finalize(struct mca_btl_base_module_t* btl)
{
mca_btl_tcp_module_t* tcp_btl = (mca_btl_tcp_module_t*) btl;
opal_list_item_t* item;
for( item = opal_list_remove_first(&tcp_btl->tcp_endpoints);
item != NULL;
item = opal_list_remove_first(&tcp_btl->tcp_endpoints)) {
mca_btl_tcp_endpoint_t *endpoint = (mca_btl_tcp_endpoint_t*)item;
OBJ_RELEASE(endpoint);
opal_progress_event_decrement();
}
free(tcp_btl);
return OMPI_SUCCESS;
}

319
ompi/mca/btl/tcp/btl_tcp.h Обычный файл
Просмотреть файл

@ -0,0 +1,319 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/**
* @file
*/
#ifndef MCA_BTL_TCP_H
#define MCA_BTL_TCP_H
/* Standard system includes */
#include "ompi_config.h"
#ifdef HAVE_SYS_TYPES_H
#include <sys/types.h>
#endif
#ifdef HAVE_SYS_SOCKET_H
#include <sys/socket.h>
#endif
#ifdef HAVE_NETINET_IN_H
#include <netinet/in.h>
#endif
/* Open MPI includes */
#include "opal/event/event.h"
#include "opal/util/output.h"
#include "ompi/class/ompi_bitmap.h"
#include "ompi/class/ompi_free_list.h"
#include "ompi/mca/pml/pml.h"
#include "ompi/mca/btl/btl.h"
#include "ompi/mca/btl/base/base.h"
#include "ompi/mca/mpool/mpool.h"
#include "ompi/mca/btl/btl.h"
#define MCA_BTL_TCP_STATISTICS 0
#if defined(c_plusplus) || defined(__cplusplus)
extern "C" {
#endif
/**
* Infiniband (TCP) BTL component.
*/
struct mca_btl_tcp_component_t {
mca_btl_base_component_1_0_0_t super; /**< base BTL component */
uint32_t tcp_num_btls; /**< number of hcas available to the TCP component */
struct mca_btl_tcp_module_t **tcp_btls; /**< array of available BTL modules */
struct mca_btl_tcp_proc_t* tcp_local; /**< local proc struct */
int tcp_free_list_num; /**< initial size of free lists */
int tcp_free_list_max; /**< maximum size of free lists */
int tcp_free_list_inc; /**< number of elements to alloc when growing free lists */
opal_hash_table_t tcp_procs; /**< hash table of tcp proc structures */
opal_list_t tcp_events; /**< list of pending tcp events */
opal_mutex_t tcp_lock; /**< lock for accessing module state */
opal_event_t tcp_recv_event; /**< recv event for listen socket */
int tcp_listen_sd; /**< listen socket for incoming connection requests */
unsigned short tcp_listen_port; /**< listen port */
char* tcp_if_include; /**< comma seperated list of interface to include */
char* tcp_if_exclude; /**< comma seperated list of interface to exclude */
int tcp_sndbuf; /**< socket sndbuf size */
int tcp_rcvbuf; /**< socket rcvbuf size */
/* free list of fragment descriptors */
ompi_free_list_t tcp_frag_eager;
ompi_free_list_t tcp_frag_max;
ompi_free_list_t tcp_frag_user;
};
typedef struct mca_btl_tcp_component_t mca_btl_tcp_component_t;
extern mca_btl_tcp_component_t mca_btl_tcp_component;
/**
* BTL Module Interface
*/
struct mca_btl_tcp_module_t {
mca_btl_base_module_t super; /**< base BTL interface */
mca_btl_base_recv_reg_t tcp_reg[256];
int tcp_ifindex; /**< PTL interface index */
struct sockaddr_in tcp_ifaddr; /**< PTL interface address */
struct sockaddr_in tcp_ifmask; /**< PTL interface netmask */
opal_list_t tcp_endpoints;
#if MCA_BTL_TCP_STATISTICS
size_t tcp_bytes_sent;
size_t tcp_bytes_recv;
size_t tcp_send_handler;
#endif
};
typedef struct mca_btl_tcp_module_t mca_btl_tcp_module_t;
extern mca_btl_tcp_module_t mca_btl_tcp_module;
/**
* Register TCP component parameters with the MCA framework
*/
extern int mca_btl_tcp_component_open(void);
/**
* Any final cleanup before being unloaded.
*/
extern int mca_btl_tcp_component_close(void);
/**
* TCP component initialization.
*
* @param num_btl_modules (OUT) Number of BTLs returned in BTL array.
* @param allow_multi_user_threads (OUT) Flag indicating wether BTL supports user threads (TRUE)
* @param have_hidden_threads (OUT) Flag indicating wether BTL uses threads (TRUE)
*/
extern mca_btl_base_module_t** mca_btl_tcp_component_init(
int *num_btl_modules,
bool allow_multi_user_threads,
bool have_hidden_threads
);
/**
* TCP component control.
*/
int mca_btl_tcp_component_control(
int param,
void* value,
size_t size
);
/**
* TCP component progress.
*/
extern int mca_btl_tcp_component_progress(void);
/**
* Cleanup any resources held by the BTL.
*
* @param btl BTL instance.
* @return OMPI_SUCCESS or error status on failure.
*/
extern int mca_btl_tcp_finalize(
struct mca_btl_base_module_t* btl
);
/**
* PML->BTL notification of change in the process list.
*
* @param btl (IN)
* @param nprocs (IN) Number of processes
* @param procs (IN) Set of processes
* @param peers (OUT) Set of (optional) peer addressing info.
* @param peers (IN/OUT) Set of processes that are reachable via this BTL.
* @return OMPI_SUCCESS or error status on failure.
*
*/
extern int mca_btl_tcp_add_procs(
struct mca_btl_base_module_t* btl,
size_t nprocs,
struct ompi_proc_t **procs,
struct mca_btl_base_endpoint_t** peers,
ompi_bitmap_t* reachable
);
/**
* PML->BTL notification of change in the process list.
*
* @param btl (IN) BTL instance
* @param nproc (IN) Number of processes.
* @param procs (IN) Set of processes.
* @param peers (IN) Set of peer data structures.
* @return Status indicating if cleanup was successful
*
*/
extern int mca_btl_tcp_del_procs(
struct mca_btl_base_module_t* btl,
size_t nprocs,
struct ompi_proc_t **procs,
struct mca_btl_base_endpoint_t** peers
);
/**
* Initiate an asynchronous send.
*
* @param btl (IN) BTL module
* @param endpoint (IN) BTL addressing information
* @param descriptor (IN) Description of the data to be transfered
* @param tag (IN) The tag value used to notify the peer.
*/
extern int mca_btl_tcp_send(
struct mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* btl_peer,
struct mca_btl_base_descriptor_t* descriptor,
mca_btl_base_tag_t tag
);
/**
* Initiate an asynchronous put.
*
* @param btl (IN) BTL module
* @param endpoint (IN) BTL addressing information
* @param descriptor (IN) Description of the data to be transferred
*/
extern int mca_btl_tcp_put(
struct mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* btl_peer,
struct mca_btl_base_descriptor_t* decriptor
);
/**
* Initiate an asynchronous get.
*
* @param btl (IN) BTL module
* @param endpoint (IN) BTL addressing information
* @param descriptor (IN) Description of the data to be transferred
*/
extern int mca_btl_tcp_get(
struct mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* btl_peer,
struct mca_btl_base_descriptor_t* decriptor
);
/**
* Register a callback function that is called on receipt
* of a fragment.
*
* @param btl (IN) BTL module
* @return Status indicating if registration was successful
*
*/
extern int mca_btl_tcp_register(
struct mca_btl_base_module_t* btl,
mca_btl_base_tag_t tag,
mca_btl_base_module_recv_cb_fn_t cbfunc,
void* cbdata);
/**
* Allocate a descriptor with a segment of the requested size.
* Note that the BTL layer may choose to return a smaller size
* if it cannot support the request.
*
* @param btl (IN) BTL module
* @param size (IN) Request segment size.
*/
extern mca_btl_base_descriptor_t* mca_btl_tcp_alloc(
struct mca_btl_base_module_t* btl,
size_t size);
/**
* Return a segment allocated by this BTL.
*
* @param btl (IN) BTL module
* @param descriptor (IN) Allocated descriptor.
*/
extern int mca_btl_tcp_free(
struct mca_btl_base_module_t* btl,
mca_btl_base_descriptor_t* des);
/**
* Prepare a descriptor for send/rdma using the supplied
* convertor. If the convertor references data that is contigous,
* the descriptor may simply point to the user buffer. Otherwise,
* this routine is responsible for allocating buffer space and
* packing if required.
*
* @param btl (IN) BTL module
* @param endpoint (IN) BTL peer addressing
* @param convertor (IN) Data type convertor
* @param reserve (IN) Additional bytes requested by upper layer to precede user data
* @param size (IN/OUT) Number of bytes to prepare (IN), number of bytes actually prepared (OUT)
*/
mca_btl_base_descriptor_t* mca_btl_tcp_prepare_src(
struct mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* peer,
struct mca_mpool_base_registration_t*,
struct ompi_convertor_t* convertor,
size_t reserve,
size_t* size
);
extern mca_btl_base_descriptor_t* mca_btl_tcp_prepare_dst(
struct mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* peer,
struct mca_mpool_base_registration_t*,
struct ompi_convertor_t* convertor,
size_t reserve,
size_t* size);
#if defined(c_plusplus) || defined(__cplusplus)
}
#endif
#endif

44
ompi/mca/btl/tcp/btl_tcp_addr.h Обычный файл
Просмотреть файл

@ -0,0 +1,44 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/**
* @file
*/
#ifndef MCA_BTL_TCP_ADDR_H
#define MCA_BTL_TCP_ADDR_H
#ifdef HAVE_SYS_TYPES_H
#include <sys/types.h>
#endif
#ifdef HAVE_SYS_SOCKET_H
#include <sys/socket.h>
#endif
#ifdef HAVE_NETINET_IN_H
#include <netinet/in.h>
#endif
/**
* Structure used to publish TCP connection information to peers.
*/
struct mca_btl_tcp_addr_t {
struct in_addr addr_inet; /**< IPv4 address in network byte order */
in_port_t addr_port; /**< listen port */
unsigned short addr_inuse; /**< local meaning only */
};
typedef struct mca_btl_tcp_addr_t mca_btl_tcp_addr_t;
#endif

630
ompi/mca/btl/tcp/btl_tcp_component.c Обычный файл
Просмотреть файл

@ -0,0 +1,630 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include "include/ompi_socket_errno.h"
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
#include <string.h>
#include <fcntl.h>
#ifdef HAVE_SYS_TYPES_H
#include <sys/types.h>
#endif
#ifdef HAVE_SYS_SOCKET_H
#include <sys/socket.h>
#endif
#ifdef HAVE_NETINET_IN_H
#include <netinet/in.h>
#endif
#ifdef HAVE_ARPA_INET_H
#include <arpa/inet.h>
#endif
#include "include/constants.h"
#include "opal/event/event.h"
#include "opal/util/if.h"
#include "opal/util/argv.h"
#include "opal/util/output.h"
#include "orte/mca/oob/base/base.h"
#include "orte/mca/ns/ns_types.h"
#include "ompi/mca/pml/pml.h"
#include "ompi/mca/btl/btl.h"
#include "mca/base/mca_base_param.h"
#include "mca/pml/base/pml_base_module_exchange.h"
#include "mca/errmgr/errmgr.h"
#include "mca/mpool/base/base.h"
#include "mca/btl/base/btl_base_error.h"
#include "btl_tcp.h"
#include "btl_tcp_addr.h"
#include "btl_tcp_proc.h"
#include "btl_tcp_frag.h"
#include "btl_tcp_endpoint.h"
#include "mca/btl/base/base.h"
#include "datatype/convertor.h"
#define IMPORTANT_WINDOWS_COMMENT() \
/* In windows, many of the socket functions return an EWOULDBLOCK instead of \
things like EAGAIN, EINPROGRESS, etc. It has been verified that this will \
not conflict with other error codes that are returned by these functions \
under UNIX/Linux environments */
mca_btl_tcp_component_t mca_btl_tcp_component = {
{
/* First, the mca_base_component_t struct containing meta information
about the component itself */
{
/* Indicate that we are a pml v1.0.0 component (which also implies a
specific MCA version) */
MCA_BTL_BASE_VERSION_1_0_0,
"tcp", /* MCA component name */
1, /* MCA component major version */
0, /* MCA component minor version */
0, /* MCA component release version */
mca_btl_tcp_component_open, /* component open */
mca_btl_tcp_component_close /* component close */
},
/* Next the MCA v1.0.0 component meta data */
{
/* Whether the component is checkpointable or not */
false
},
mca_btl_tcp_component_init,
NULL,
}
};
/*
* utility routines for parameter registration
*/
static inline char* mca_btl_tcp_param_register_string(
const char* param_name,
const char* default_value)
{
char *param_value;
int id = mca_base_param_register_string("btl","tcp",param_name,NULL,default_value);
mca_base_param_lookup_string(id, &param_value);
return param_value;
}
static inline int mca_btl_tcp_param_register_int(
const char* param_name,
int default_value)
{
int id = mca_base_param_register_int("btl","tcp",param_name,NULL,default_value);
int param_value = default_value;
mca_base_param_lookup_int(id,&param_value);
return param_value;
}
/*
* Data structure for accepting connections.
*/
struct mca_btl_tcp_event_t {
opal_list_item_t item;
opal_event_t event;
};
typedef struct mca_btl_tcp_event_t mca_btl_tcp_event_t;
static void mca_btl_tcp_event_construct(mca_btl_tcp_event_t* event)
{
OPAL_THREAD_LOCK(&mca_btl_tcp_component.tcp_lock);
opal_list_append(&mca_btl_tcp_component.tcp_events, &event->item);
OPAL_THREAD_UNLOCK(&mca_btl_tcp_component.tcp_lock);
}
static void mca_btl_tcp_event_destruct(mca_btl_tcp_event_t* event)
{
OPAL_THREAD_LOCK(&mca_btl_tcp_component.tcp_lock);
opal_list_remove_item(&mca_btl_tcp_component.tcp_events, &event->item);
OPAL_THREAD_UNLOCK(&mca_btl_tcp_component.tcp_lock);
}
OBJ_CLASS_INSTANCE(
mca_btl_tcp_event_t,
opal_list_item_t,
mca_btl_tcp_event_construct,
mca_btl_tcp_event_destruct);
/*
* functions for receiving event callbacks
*/
static void mca_btl_tcp_component_recv_handler(int, short, void*);
/*
* Called by MCA framework to open the component, registers
* component parameters.
*/
int mca_btl_tcp_component_open(void)
{
#ifdef WIN32
WSADATA win_sock_data;
if (WSAStartup(MAKEWORD(2,2), &win_sock_data) != 0) {
BTL_ERROR(("failed to initialise windows sockets:%d", WSAGetLastError()));
return OMPI_ERROR;
}
#endif
/* initialize state */
mca_btl_tcp_component.tcp_listen_sd = -1;
mca_btl_tcp_component.tcp_num_btls=0;
mca_btl_tcp_component.tcp_btls=NULL;
/* initialize objects */
OBJ_CONSTRUCT(&mca_btl_tcp_component.tcp_lock, opal_mutex_t);
OBJ_CONSTRUCT(&mca_btl_tcp_component.tcp_procs, opal_hash_table_t);
OBJ_CONSTRUCT(&mca_btl_tcp_component.tcp_events, opal_list_t);
OBJ_CONSTRUCT(&mca_btl_tcp_component.tcp_frag_eager, ompi_free_list_t);
OBJ_CONSTRUCT(&mca_btl_tcp_component.tcp_frag_max, ompi_free_list_t);
OBJ_CONSTRUCT(&mca_btl_tcp_component.tcp_frag_user, ompi_free_list_t);
opal_hash_table_init(&mca_btl_tcp_component.tcp_procs, 256);
/* register TCP component parameters */
mca_btl_tcp_component.tcp_if_include =
mca_btl_tcp_param_register_string("if_include", "");
mca_btl_tcp_component.tcp_if_exclude =
mca_btl_tcp_param_register_string("if_exclude", "lo");
mca_btl_tcp_component.tcp_free_list_num =
mca_btl_tcp_param_register_int ("free_list_num", 8);
mca_btl_tcp_component.tcp_free_list_max =
mca_btl_tcp_param_register_int ("free_list_max", 1024);
mca_btl_tcp_component.tcp_free_list_inc =
mca_btl_tcp_param_register_int ("free_list_inc", 32);
mca_btl_tcp_component.tcp_sndbuf =
mca_btl_tcp_param_register_int ("sndbuf", 128*1024);
mca_btl_tcp_component.tcp_rcvbuf =
mca_btl_tcp_param_register_int ("rcvbuf", 128*1024);
mca_btl_tcp_module.super.btl_exclusivity =
mca_btl_tcp_param_register_int ("exclusivity", 0);
mca_btl_tcp_module.super.btl_eager_limit =
mca_btl_tcp_param_register_int ("first_frag_size", 64*1024) - sizeof(mca_btl_base_header_t);
mca_btl_tcp_module.super.btl_min_send_size =
mca_btl_tcp_param_register_int ("min_send_size", 64*1024) - sizeof(mca_btl_base_header_t);
mca_btl_tcp_module.super.btl_max_send_size =
mca_btl_tcp_param_register_int ("max_send_size", 256*1024) - sizeof(mca_btl_base_header_t);
mca_btl_tcp_module.super.btl_min_rdma_size =
mca_btl_tcp_param_register_int("min_rdma_size", 1024*1024);
mca_btl_tcp_module.super.btl_max_rdma_size =
mca_btl_tcp_param_register_int("max_rdma_size", 2*1024*1024);
mca_btl_tcp_module.super.btl_flags =
mca_btl_tcp_param_register_int("flags", 0);
return OMPI_SUCCESS;
}
/*
* module cleanup - sanity checking of queue lengths
*/
int mca_btl_tcp_component_close(void)
{
opal_list_item_t* item;
#ifdef WIN32
WSACleanup();
#endif
if(NULL != mca_btl_tcp_component.tcp_if_include)
free(mca_btl_tcp_component.tcp_if_include);
if(NULL != mca_btl_tcp_component.tcp_if_exclude)
free(mca_btl_tcp_component.tcp_if_exclude);
if (NULL != mca_btl_tcp_component.tcp_btls)
free(mca_btl_tcp_component.tcp_btls);
if (mca_btl_tcp_component.tcp_listen_sd >= 0) {
opal_event_del(&mca_btl_tcp_component.tcp_recv_event);
close(mca_btl_tcp_component.tcp_listen_sd);
mca_btl_tcp_component.tcp_listen_sd = -1;
}
/* cleanup any pending events */
OPAL_THREAD_LOCK(&mca_btl_tcp_component.tcp_lock);
for(item = opal_list_remove_first(&mca_btl_tcp_component.tcp_events);
item != NULL;
item = opal_list_remove_first(&mca_btl_tcp_component.tcp_events)) {
mca_btl_tcp_event_t* event = (mca_btl_tcp_event_t*)item;
opal_event_del(&event->event);
OBJ_RELEASE(event);
}
OPAL_THREAD_UNLOCK(&mca_btl_tcp_component.tcp_lock);
/* release resources */
OBJ_DESTRUCT(&mca_btl_tcp_component.tcp_procs);
OBJ_DESTRUCT(&mca_btl_tcp_component.tcp_events);
OBJ_DESTRUCT(&mca_btl_tcp_component.tcp_frag_eager);
OBJ_DESTRUCT(&mca_btl_tcp_component.tcp_frag_max);
OBJ_DESTRUCT(&mca_btl_tcp_component.tcp_frag_user);
OBJ_DESTRUCT(&mca_btl_tcp_component.tcp_lock);
return OMPI_SUCCESS;
}
/*
* Create a btl instance and add to modules list.
*/
static int mca_btl_tcp_create(int if_index, const char* if_name)
{
struct mca_btl_tcp_module_t* btl = (struct mca_btl_tcp_module_t *)malloc(sizeof(mca_btl_tcp_module_t));
char param[256];
if(NULL == btl)
return OMPI_ERR_OUT_OF_RESOURCE;
memcpy(btl, &mca_btl_tcp_module, sizeof(mca_btl_tcp_module));
OBJ_CONSTRUCT(&btl->tcp_endpoints, opal_list_t);
mca_btl_tcp_component.tcp_btls[mca_btl_tcp_component.tcp_num_btls++] = btl;
/* initialize the btl */
btl->tcp_ifindex = if_index;
#if MCA_BTL_TCP_STATISTICS
btl->tcp_bytes_recv = 0;
btl->tcp_bytes_sent = 0;
btl->tcp_send_handler = 0;
#endif
opal_ifindextoaddr(if_index, (struct sockaddr*)&btl->tcp_ifaddr, sizeof(btl->tcp_ifaddr));
opal_ifindextomask(if_index, (struct sockaddr*)&btl->tcp_ifmask, sizeof(btl->tcp_ifmask));
/* allow user to specify interface bandwidth */
sprintf(param, "bandwidth_%s", if_name);
btl->super.btl_bandwidth = mca_btl_tcp_param_register_int(param, 0);
/* allow user to override/specify latency ranking */
sprintf(param, "latency_%s", if_name);
btl->super.btl_latency = mca_btl_tcp_param_register_int(param, 0);
#if 0 && OMPI_ENABLE_DEBUG
BTL_OUTPUT(("interface: %s bandwidth %d latency %d",
if_name, btl->super.btl_bandwidth, btl->super.btl_latency));
#endif
return OMPI_SUCCESS;
}
/*
* Create a TCP BTL instance for either:
* (1) all interfaces specified by the user
* (2) all available interfaces
* (3) all available interfaces except for those excluded by the user
*/
static int mca_btl_tcp_component_create_instances(void)
{
int if_count = opal_ifcount();
int if_index;
char **include;
char **exclude;
char **argv;
if(if_count <= 0)
return OMPI_ERROR;
/* allocate memory for btls */
mca_btl_tcp_component.tcp_btls = (mca_btl_tcp_module_t **)malloc(if_count * sizeof(mca_btl_tcp_module_t*));
if(NULL == mca_btl_tcp_component.tcp_btls)
return OMPI_ERR_OUT_OF_RESOURCE;
/* if the user specified an interface list - use these exclusively */
argv = include = opal_argv_split(mca_btl_tcp_component.tcp_if_include,',');
while(argv && *argv) {
char* if_name = *argv;
int if_index = opal_ifnametoindex(if_name);
if(if_index < 0) {
BTL_ERROR(("invalid interface \"%s\"", if_name));
} else {
mca_btl_tcp_create(if_index, if_name);
}
argv++;
}
opal_argv_free(include);
if(mca_btl_tcp_component.tcp_num_btls)
return OMPI_SUCCESS;
/* if the interface list was not specified by the user, create
* a BTL for each interface that was not excluded.
*/
exclude = opal_argv_split(mca_btl_tcp_component.tcp_if_exclude,',');
for(if_index = opal_ifbegin(); if_index >= 0; if_index = opal_ifnext(if_index)) {
char if_name[32];
opal_ifindextoname(if_index, if_name, sizeof(if_name));
/* check to see if this interface exists in the exclude list */
if(opal_ifcount() > 1) {
argv = exclude;
while(argv && *argv) {
if(strncmp(*argv,if_name,strlen(*argv)) == 0)
break;
argv++;
}
/* if this interface was not found in the excluded list - create a BTL */
if(argv == 0 || *argv == 0) {
mca_btl_tcp_create(if_index, if_name);
}
} else {
mca_btl_tcp_create(if_index, if_name);
}
}
opal_argv_free(exclude);
return OMPI_SUCCESS;
}
/*
* Create a listen socket and bind to all interfaces
*/
static int mca_btl_tcp_component_create_listen(void)
{
int flags;
struct sockaddr_in inaddr;
ompi_socklen_t addrlen;
/* create a listen socket for incoming connections */
mca_btl_tcp_component.tcp_listen_sd = socket(AF_INET, SOCK_STREAM, 0);
if(mca_btl_tcp_component.tcp_listen_sd < 0) {
BTL_ERROR(("socket() failed with errno=%d", ompi_socket_errno));
return OMPI_ERROR;
}
mca_btl_tcp_set_socket_options(mca_btl_tcp_component.tcp_listen_sd);
/* bind to all addresses and dynamically assigned port */
memset(&inaddr, 0, sizeof(inaddr));
inaddr.sin_family = AF_INET;
inaddr.sin_addr.s_addr = INADDR_ANY;
inaddr.sin_port = 0;
if(bind(mca_btl_tcp_component.tcp_listen_sd, (struct sockaddr*)&inaddr, sizeof(inaddr)) < 0) {
BTL_ERROR(("bind() failed with errno=%d", ompi_socket_errno));
return OMPI_ERROR;
}
/* resolve system assignend port */
addrlen = sizeof(struct sockaddr_in);
if(getsockname(mca_btl_tcp_component.tcp_listen_sd, (struct sockaddr*)&inaddr, &addrlen) < 0) {
BTL_ERROR(("getsockname() failed with errno=%d", ompi_socket_errno));
return OMPI_ERROR;
}
mca_btl_tcp_component.tcp_listen_port = inaddr.sin_port;
/* setup listen backlog to maximum allowed by kernel */
if(listen(mca_btl_tcp_component.tcp_listen_sd, SOMAXCONN) < 0) {
BTL_ERROR(("listen() failed with errno=%d", ompi_socket_errno));
return OMPI_ERROR;
}
/* set socket up to be non-blocking, otherwise accept could block */
if((flags = fcntl(mca_btl_tcp_component.tcp_listen_sd, F_GETFL, 0)) < 0) {
BTL_ERROR(("fcntl(F_GETFL) failed with errno=%d", ompi_socket_errno));
return OMPI_ERROR;
} else {
flags |= O_NONBLOCK;
if(fcntl(mca_btl_tcp_component.tcp_listen_sd, F_SETFL, flags) < 0) {
BTL_ERROR(("fcntl(F_SETFL) failed with errno=%d", ompi_socket_errno));
return OMPI_ERROR;
}
}
/* register listen port */
opal_event_set(
&mca_btl_tcp_component.tcp_recv_event,
mca_btl_tcp_component.tcp_listen_sd,
OPAL_EV_READ|OPAL_EV_PERSIST,
mca_btl_tcp_component_recv_handler,
0);
opal_event_add(&mca_btl_tcp_component.tcp_recv_event,0);
return OMPI_SUCCESS;
}
/*
* Register TCP module addressing information. The MCA framework
* will make this available to all peers.
*/
static int mca_btl_tcp_component_exchange(void)
{
int rc=0;
size_t i=0;
size_t size = mca_btl_tcp_component.tcp_num_btls * sizeof(mca_btl_tcp_addr_t);
if(mca_btl_tcp_component.tcp_num_btls != 0) {
mca_btl_tcp_addr_t *addrs = (mca_btl_tcp_addr_t *)malloc(size);
for(i=0; i<mca_btl_tcp_component.tcp_num_btls; i++) {
struct mca_btl_tcp_module_t* btl = mca_btl_tcp_component.tcp_btls[i];
addrs[i].addr_inet = btl->tcp_ifaddr.sin_addr;
addrs[i].addr_port = mca_btl_tcp_component.tcp_listen_port;
addrs[i].addr_inuse = 0;
}
rc = mca_base_modex_send(&mca_btl_tcp_component.super.btl_version, addrs, size);
free(addrs);
}
return rc;
}
/*
* TCP module initialization:
* (1) read interface list from kernel and compare against module parameters
* then create a BTL instance for selected interfaces
* (2) setup TCP listen socket for incoming connection attempts
* (3) register BTL parameters with the MCA
*/
mca_btl_base_module_t** mca_btl_tcp_component_init(int *num_btl_modules,
bool enable_progress_threads,
bool enable_mpi_threads)
{
mca_btl_base_module_t **btls;
*num_btl_modules = 0;
/* initialize free lists */
ompi_free_list_init( &mca_btl_tcp_component.tcp_frag_eager,
sizeof (mca_btl_tcp_frag_eager_t) + mca_btl_tcp_module.super.btl_eager_limit,
OBJ_CLASS (mca_btl_tcp_frag_eager_t),
mca_btl_tcp_component.tcp_free_list_num,
mca_btl_tcp_component.tcp_free_list_max,
mca_btl_tcp_component.tcp_free_list_inc,
NULL );
ompi_free_list_init( &mca_btl_tcp_component.tcp_frag_max,
sizeof (mca_btl_tcp_frag_max_t) + mca_btl_tcp_module.super.btl_max_send_size,
OBJ_CLASS (mca_btl_tcp_frag_max_t),
mca_btl_tcp_component.tcp_free_list_num,
mca_btl_tcp_component.tcp_free_list_max,
mca_btl_tcp_component.tcp_free_list_inc,
NULL );
ompi_free_list_init( &mca_btl_tcp_component.tcp_frag_user,
sizeof (mca_btl_tcp_frag_user_t),
OBJ_CLASS (mca_btl_tcp_frag_user_t),
mca_btl_tcp_component.tcp_free_list_num,
mca_btl_tcp_component.tcp_free_list_max,
mca_btl_tcp_component.tcp_free_list_inc,
NULL );
/* create a BTL TCP module for selected interfaces */
if(mca_btl_tcp_component_create_instances() != OMPI_SUCCESS)
return 0;
/* create a TCP listen socket for incoming connection attempts */
if(mca_btl_tcp_component_create_listen() != OMPI_SUCCESS)
return 0;
/* publish TCP parameters with the MCA framework */
if(mca_btl_tcp_component_exchange() != OMPI_SUCCESS)
return 0;
btls = (mca_btl_base_module_t **)malloc(mca_btl_tcp_component.tcp_num_btls *
sizeof(mca_btl_base_module_t*));
if(NULL == btls)
return NULL;
memcpy(btls, mca_btl_tcp_component.tcp_btls, mca_btl_tcp_component.tcp_num_btls*sizeof(mca_btl_tcp_module_t*));
*num_btl_modules = mca_btl_tcp_component.tcp_num_btls;
return btls;
}
/*
* TCP module control
*/
int mca_btl_tcp_component_control(int param, void* value, size_t size)
{
return OMPI_SUCCESS;
}
/*
* Called by mca_btl_tcp_component_recv() when the TCP listen
* socket has pending connection requests. Accept incoming
* requests and queue for completion of the connection handshake.
*/
static void mca_btl_tcp_component_accept(void)
{
while(true) {
ompi_socklen_t addrlen = sizeof(struct sockaddr_in);
struct sockaddr_in addr;
mca_btl_tcp_event_t *event;
int sd = accept(mca_btl_tcp_component.tcp_listen_sd, (struct sockaddr*)&addr, &addrlen);
if(sd < 0) {
IMPORTANT_WINDOWS_COMMENT();
if(ompi_socket_errno == EINTR)
continue;
if(ompi_socket_errno != EAGAIN || ompi_socket_errno != EWOULDBLOCK)
BTL_ERROR(("accept() failed with errno %d.", ompi_socket_errno));
return;
}
mca_btl_tcp_set_socket_options(sd);
/* wait for receipt of peers process identifier to complete this connection */
event = OBJ_NEW(mca_btl_tcp_event_t);
opal_event_set(&event->event, sd, OPAL_EV_READ, mca_btl_tcp_component_recv_handler, event);
opal_event_add(&event->event, 0);
}
}
/*
* Event callback when there is data available on the registered
* socket to recv.
*/
static void mca_btl_tcp_component_recv_handler(int sd, short flags, void* user)
{
orte_process_name_t guid;
struct sockaddr_in addr;
int retval;
mca_btl_tcp_proc_t* btl_proc;
ompi_socklen_t addr_len = sizeof(addr);
mca_btl_tcp_event_t *event = (mca_btl_tcp_event_t *)user;
/* accept new connections on the listen socket */
if(mca_btl_tcp_component.tcp_listen_sd == sd) {
mca_btl_tcp_component_accept();
return;
}
OBJ_RELEASE(event);
/* recv the process identifier */
retval = recv(sd, (char *)&guid, sizeof(guid), 0);
if(retval != sizeof(guid)) {
close(sd);
return;
}
/* now set socket up to be non-blocking */
if((flags = fcntl(sd, F_GETFL, 0)) < 0) {
BTL_ERROR(("fcntl(F_GETFL) failed with errno=%d", ompi_socket_errno));
} else {
flags |= O_NONBLOCK;
if(fcntl(sd, F_SETFL, flags) < 0) {
BTL_ERROR(("fcntl(F_SETFL) failed with errno=%d", ompi_socket_errno));
}
}
/* lookup the corresponding process */
btl_proc = mca_btl_tcp_proc_lookup(&guid);
if(NULL == btl_proc) {
BTL_ERROR(("errno=%d",errno));
close(sd);
return;
}
/* lookup peer address */
if(getpeername(sd, (struct sockaddr*)&addr, &addr_len) != 0) {
BTL_ERROR(("getpeername() failed with errno=%d", ompi_socket_errno));
close(sd);
return;
}
/* are there any existing peer instances will to accept this connection */
if(mca_btl_tcp_proc_accept(btl_proc, &addr, sd) == false) {
close(sd);
return;
}
}

686
ompi/mca/btl/tcp/btl_tcp_endpoint.c Обычный файл
Просмотреть файл

@ -0,0 +1,686 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include <stdlib.h>
#include <string.h>
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
#include "include/ompi_socket_errno.h"
#ifdef HAVE_SYS_TYPES_H
#include <sys/types.h>
#endif
#ifdef HAVE_FCNTL_H
#include <fcntl.h>
#endif
#ifdef HAVE_NETINET_IN_H
#include <netinet/in.h>
#endif
#ifdef HAVE_NETINET_TCP_H
#include <netinet/tcp.h>
#endif
#ifdef HAVE_ARPA_INET_H
#include <arpa/inet.h>
#endif
#include <sys/time.h>
#include <time.h>
#include "include/types.h"
#include "mca/ns/base/base.h"
#include "mca/oob/base/base.h"
#include "mca/rml/rml.h"
#include "mca/errmgr/errmgr.h"
#include "mca/btl/base/btl_base_error.h"
#include "dps/dps.h"
#include "btl_tcp.h"
#include "btl_tcp_endpoint.h"
#include "btl_tcp_proc.h"
#include "btl_tcp_frag.h"
#include "btl_tcp_addr.h"
/*
* Initialize state of the endpoint instance.
*
*/
static void mca_btl_tcp_endpoint_construct(mca_btl_tcp_endpoint_t* endpoint)
{
endpoint->endpoint_btl = NULL;
endpoint->endpoint_proc = NULL;
endpoint->endpoint_addr = NULL;
endpoint->endpoint_sd = -1;
endpoint->endpoint_send_frag = 0;
endpoint->endpoint_recv_frag = 0;
endpoint->endpoint_send_event.ev_flags = 0;
endpoint->endpoint_recv_event.ev_flags = 0;
endpoint->endpoint_state = MCA_BTL_TCP_CLOSED;
endpoint->endpoint_retries = 0;
endpoint->endpoint_nbo = false;
OBJ_CONSTRUCT(&endpoint->endpoint_frags, opal_list_t);
OBJ_CONSTRUCT(&endpoint->endpoint_send_lock, opal_mutex_t);
OBJ_CONSTRUCT(&endpoint->endpoint_recv_lock, opal_mutex_t);
}
/*
* Destroy a endpoint
*
*/
static void mca_btl_tcp_endpoint_destruct(mca_btl_tcp_endpoint_t* endpoint)
{
mca_btl_tcp_proc_remove(endpoint->endpoint_proc, endpoint);
mca_btl_tcp_endpoint_close(endpoint);
OBJ_DESTRUCT(&endpoint->endpoint_frags);
OBJ_DESTRUCT(&endpoint->endpoint_send_lock);
OBJ_DESTRUCT(&endpoint->endpoint_recv_lock);
}
OBJ_CLASS_INSTANCE(
mca_btl_tcp_endpoint_t,
opal_list_item_t,
mca_btl_tcp_endpoint_construct,
mca_btl_tcp_endpoint_destruct);
#define IMPORTANT_WINDOWS_COMMENT() \
/* In windows, many of the socket functions return an EWOULDBLOCK instead of \
things like EAGAIN, EINPROGRESS, etc. It has been verified that this will \
not conflict with other error codes that are returned by these functions \
under UNIX/Linux environments */
static void mca_btl_tcp_endpoint_construct(mca_btl_base_endpoint_t* btl_endpoint);
static void mca_btl_tcp_endpoint_destruct(mca_btl_base_endpoint_t* btl_endpoint);
static int mca_btl_tcp_endpoint_start_connect(mca_btl_base_endpoint_t*);
static void mca_btl_tcp_endpoint_connected(mca_btl_base_endpoint_t*);
static void mca_btl_tcp_endpoint_recv_handler(int sd, short flags, void* user);
static void mca_btl_tcp_endpoint_send_handler(int sd, short flags, void* user);
/*
* Diagnostics: change this to "1" to enable the function
* mca_btl_tcp_endpoint_dump(), below
*/
#define WANT_PEER_DUMP 0
/*
* diagnostics
*/
#if WANT_PEER_DUMP
static void mca_btl_tcp_endpoint_dump(mca_btl_base_endpoint_t* btl_endpoint, const char* msg)
{
char src[64];
char dst[64];
int sndbuf,rcvbuf,nodelay,flags;
struct sockaddr_in inaddr;
ompi_socklen_t obtlen;
ompi_socklen_t addrlen = sizeof(struct sockaddr_in);
getsockname(btl_endpoint->endpoint_sd, (struct sockaddr*)&inaddr, &addrlen);
sprintf(src, "%s", inet_ntoa(inaddr.sin_addr));
getpeername(btl_endpoint->endpoint_sd, (struct sockaddr*)&inaddr, &addrlen);
sprintf(dst, "%s", inet_ntoa(inaddr.sin_addr));
if((flags = fcntl(btl_endpoint->endpoint_sd, F_GETFL, 0)) < 0) {
BTL_ERROR(("fcntl(F_GETFL) failed with errno=%d", ompi_socket_errno));
}
#if defined(SO_SNDBUF)
obtlen = sizeof(sndbuf);
if(getsockopt(btl_endpoint->endpoint_sd, SOL_SOCKET, SO_SNDBUF, (char *)&sndbuf, &obtlen) < 0) {
BTL_ERROR(("SO_SNDBUF option: errno %d", ompi_socket_errno));
}
#else
sndbuf = -1;
#endif
#if defined(SO_RCVBUF)
obtlen = sizeof(rcvbuf);
if(getsockopt(btl_endpoint->endpoint_sd, SOL_SOCKET, SO_RCVBUF, (char *)&rcvbuf, &obtlen) < 0) {
BTL_ERROR(("SO_RCVBUF option: errno %d", ompi_socket_errno));
}
#else
rcvbuf = -1;
#endif
#if defined(TCP_NODELAY)
obtlen = sizeof(nodelay);
if(getsockopt(btl_endpoint->endpoint_sd, IPPROTO_TCP, TCP_NODELAY, (char *)&nodelay, &obtlen) < 0) {
BTL_ERROR(("TCP_NODELAY option: errno %d", ompi_socket_errno));
}
#else
nodelay = 0;
#endif
BTL_DEBUG(("%s: %s - %s nodelay %d sndbuf %d rcvbuf %d flags %08x",
msg, src, dst, nodelay, sndbuf, rcvbuf, flags));
}
#endif
/*
* Initialize events to be used by the endpoint instance for TCP select/poll callbacks.
*/
static inline void mca_btl_tcp_endpoint_event_init(mca_btl_base_endpoint_t* btl_endpoint, int sd)
{
opal_event_set(
&btl_endpoint->endpoint_recv_event,
btl_endpoint->endpoint_sd,
OPAL_EV_READ|OPAL_EV_PERSIST,
mca_btl_tcp_endpoint_recv_handler,
btl_endpoint);
opal_event_set(
&btl_endpoint->endpoint_send_event,
btl_endpoint->endpoint_sd,
OPAL_EV_WRITE|OPAL_EV_PERSIST,
mca_btl_tcp_endpoint_send_handler,
btl_endpoint);
}
/*
* Attempt to send a fragment using a given endpoint. If the endpoint is not connected,
* queue the fragment and start the connection as required.
*/
int mca_btl_tcp_endpoint_send(mca_btl_base_endpoint_t* btl_endpoint, mca_btl_tcp_frag_t* frag)
{
int rc = OMPI_SUCCESS;
OPAL_THREAD_LOCK(&btl_endpoint->endpoint_send_lock);
switch(btl_endpoint->endpoint_state) {
case MCA_BTL_TCP_CONNECTING:
case MCA_BTL_TCP_CONNECT_ACK:
case MCA_BTL_TCP_CLOSED:
opal_list_append(&btl_endpoint->endpoint_frags, (opal_list_item_t*)frag);
if(btl_endpoint->endpoint_state == MCA_BTL_TCP_CLOSED)
rc = mca_btl_tcp_endpoint_start_connect(btl_endpoint);
break;
case MCA_BTL_TCP_FAILED:
rc = OMPI_ERR_UNREACH;
break;
case MCA_BTL_TCP_CONNECTED:
if (NULL != btl_endpoint->endpoint_send_frag) {
opal_list_append(&btl_endpoint->endpoint_frags, (opal_list_item_t*)frag);
} else {
if(mca_btl_tcp_frag_send(frag, btl_endpoint->endpoint_sd)) {
OPAL_THREAD_UNLOCK(&btl_endpoint->endpoint_send_lock);
frag->base.des_cbfunc(&frag->btl->super, frag->endpoint, &frag->base, frag->rc);
return OMPI_SUCCESS;
} else {
btl_endpoint->endpoint_send_frag = frag;
opal_event_add(&btl_endpoint->endpoint_send_event, 0);
}
}
break;
case MCA_BTL_TCP_SHUTDOWN:
rc = OMPI_ERROR;
break;
}
OPAL_THREAD_UNLOCK(&btl_endpoint->endpoint_send_lock);
return rc;
}
/*
* A blocking send on a non-blocking socket. Used to send the small amount of connection
* information that identifies the endpoints endpoint.
*/
static int mca_btl_tcp_endpoint_send_blocking(mca_btl_base_endpoint_t* btl_endpoint, void* data, size_t size)
{
unsigned char* ptr = (unsigned char*)data;
size_t cnt = 0;
while(cnt < size) {
int retval = send(btl_endpoint->endpoint_sd, (const char *)ptr+cnt, size-cnt, 0);
if(retval < 0) {
IMPORTANT_WINDOWS_COMMENT();
if(ompi_socket_errno != EINTR && ompi_socket_errno != EAGAIN && ompi_socket_errno != EWOULDBLOCK) {
BTL_ERROR(("send() failed with errno=%d",ompi_socket_errno));
mca_btl_tcp_endpoint_close(btl_endpoint);
return -1;
}
continue;
}
cnt += retval;
}
return cnt;
}
/*
* Send the globally unique identifier for this process to a endpoint on
* a newly connected socket.
*/
static int mca_btl_tcp_endpoint_send_connect_ack(mca_btl_base_endpoint_t* btl_endpoint)
{
/* send process identifier to remote endpoint */
mca_btl_tcp_proc_t* btl_proc = mca_btl_tcp_proc_local();
if(mca_btl_tcp_endpoint_send_blocking(btl_endpoint, &btl_proc->proc_name, sizeof(btl_proc->proc_name)) !=
sizeof(btl_proc->proc_name)) {
return OMPI_ERR_UNREACH;
}
return OMPI_SUCCESS;
}
/*
* Check the state of this endpoint. If the incoming connection request matches
* our endpoints address, check the state of our connection:
* (1) if a connection has not been attempted, accept the connection
* (2) if a connection has not been established, and the endpoints process identifier
* is less than the local process, accept the connection
* otherwise, reject the connection and continue with the current connection
*/
bool mca_btl_tcp_endpoint_accept(mca_btl_base_endpoint_t* btl_endpoint, struct sockaddr_in* addr, int sd)
{
mca_btl_tcp_addr_t* btl_addr;
mca_btl_tcp_proc_t* this_proc = mca_btl_tcp_proc_local();
orte_ns_cmp_bitmask_t mask = ORTE_NS_CMP_ALL;
int cmpval;
OPAL_THREAD_LOCK(&btl_endpoint->endpoint_recv_lock);
OPAL_THREAD_LOCK(&btl_endpoint->endpoint_send_lock);
if((btl_addr = btl_endpoint->endpoint_addr) != NULL &&
btl_addr->addr_inet.s_addr == addr->sin_addr.s_addr) {
mca_btl_tcp_proc_t *endpoint_proc = btl_endpoint->endpoint_proc;
cmpval = orte_ns.compare(mask,
&endpoint_proc->proc_ompi->proc_name,
&this_proc->proc_ompi->proc_name);
if((btl_endpoint->endpoint_sd < 0) ||
(btl_endpoint->endpoint_state != MCA_BTL_TCP_CONNECTED &&
cmpval < 0)) {
mca_btl_tcp_endpoint_close(btl_endpoint);
btl_endpoint->endpoint_sd = sd;
if(mca_btl_tcp_endpoint_send_connect_ack(btl_endpoint) != OMPI_SUCCESS) {
mca_btl_tcp_endpoint_close(btl_endpoint);
OPAL_THREAD_UNLOCK(&btl_endpoint->endpoint_send_lock);
OPAL_THREAD_UNLOCK(&btl_endpoint->endpoint_recv_lock);
return false;
}
mca_btl_tcp_endpoint_event_init(btl_endpoint, sd);
opal_event_add(&btl_endpoint->endpoint_recv_event, 0);
mca_btl_tcp_endpoint_connected(btl_endpoint);
#if OMPI_ENABLE_DEBUG && WANT_PEER_DUMP
mca_btl_tcp_endpoint_dump(btl_endpoint, "accepted");
#endif
OPAL_THREAD_UNLOCK(&btl_endpoint->endpoint_send_lock);
OPAL_THREAD_UNLOCK(&btl_endpoint->endpoint_recv_lock);
return true;
}
}
OPAL_THREAD_UNLOCK(&btl_endpoint->endpoint_send_lock);
OPAL_THREAD_UNLOCK(&btl_endpoint->endpoint_recv_lock);
return false;
}
/*
* Remove any event registrations associated with the socket
* and update the endpoint state to reflect the connection has
* been closed.
*/
void mca_btl_tcp_endpoint_close(mca_btl_base_endpoint_t* btl_endpoint)
{
if(btl_endpoint->endpoint_sd >= 0) {
opal_event_del(&btl_endpoint->endpoint_recv_event);
opal_event_del(&btl_endpoint->endpoint_send_event);
close(btl_endpoint->endpoint_sd);
btl_endpoint->endpoint_sd = -1;
}
btl_endpoint->endpoint_state = MCA_BTL_TCP_CLOSED;
btl_endpoint->endpoint_retries++;
}
void mca_btl_tcp_endpoint_shutdown(mca_btl_base_endpoint_t* btl_endpoint)
{
OPAL_THREAD_LOCK(&btl_endpoint->endpoint_recv_lock);
OPAL_THREAD_LOCK(&btl_endpoint->endpoint_send_lock);
mca_btl_tcp_endpoint_close(btl_endpoint);
btl_endpoint->endpoint_state = MCA_BTL_TCP_SHUTDOWN;
OPAL_THREAD_UNLOCK(&btl_endpoint->endpoint_send_lock);
OPAL_THREAD_UNLOCK(&btl_endpoint->endpoint_recv_lock);
}
/*
* Setup endpoint state to reflect that connection has been established,
* and start any pending sends.
*/
static void mca_btl_tcp_endpoint_connected(mca_btl_base_endpoint_t* btl_endpoint)
{
/* setup socket options */
btl_endpoint->endpoint_state = MCA_BTL_TCP_CONNECTED;
btl_endpoint->endpoint_retries = 0;
if(opal_list_get_size(&btl_endpoint->endpoint_frags) > 0) {
if(NULL == btl_endpoint->endpoint_send_frag)
btl_endpoint->endpoint_send_frag = (mca_btl_tcp_frag_t*)
opal_list_remove_first(&btl_endpoint->endpoint_frags);
opal_event_add(&btl_endpoint->endpoint_send_event, 0);
}
}
/*
* A blocking recv on a non-blocking socket. Used to receive the small amount of connection
* information that identifies the endpoints endpoint.
*/
static int mca_btl_tcp_endpoint_recv_blocking(mca_btl_base_endpoint_t* btl_endpoint, void* data, size_t size)
{
unsigned char* ptr = (unsigned char*)data;
size_t cnt = 0;
while(cnt < size) {
int retval = recv(btl_endpoint->endpoint_sd, (char *)ptr+cnt, size-cnt, 0);
/* remote closed connection */
if(retval == 0) {
mca_btl_tcp_endpoint_close(btl_endpoint);
return -1;
}
/* socket is non-blocking so handle errors */
if(retval < 0) {
IMPORTANT_WINDOWS_COMMENT();
if(ompi_socket_errno != EINTR && ompi_socket_errno != EAGAIN && ompi_socket_errno != EWOULDBLOCK) {
BTL_ERROR(("recv() failed with errno=%d",ompi_socket_errno));
mca_btl_tcp_endpoint_close(btl_endpoint);
return -1;
}
continue;
}
cnt += retval;
}
return cnt;
}
/*
* Receive the endpoints globally unique process identification from a newly
* connected socket and verify the expected response. If so, move the
* socket to a connected state.
*/
static int mca_btl_tcp_endpoint_recv_connect_ack(mca_btl_base_endpoint_t* btl_endpoint)
{
orte_process_name_t guid;
mca_btl_tcp_proc_t* btl_proc = btl_endpoint->endpoint_proc;
if((mca_btl_tcp_endpoint_recv_blocking(btl_endpoint, &guid, sizeof(orte_process_name_t))) != sizeof(orte_process_name_t)) {
return OMPI_ERR_UNREACH;
}
/* compare this to the expected values */
if(memcmp(&btl_proc->proc_name, &guid, sizeof(orte_process_name_t)) != 0) {
BTL_ERROR(("received unexpected process identifier [%lu,%lu,%lu]",
ORTE_NAME_ARGS(&guid)));
mca_btl_tcp_endpoint_close(btl_endpoint);
return OMPI_ERR_UNREACH;
}
/* connected */
mca_btl_tcp_endpoint_connected(btl_endpoint);
#if OMPI_ENABLE_DEBUG && WANT_PEER_DUMP
mca_btl_tcp_endpoint_dump(btl_endpoint, "connected");
#endif
return OMPI_SUCCESS;
}
void mca_btl_tcp_set_socket_options(int sd)
{
int optval;
#if defined(TCP_NODELAY)
optval = 1;
if(setsockopt(sd, IPPROTO_TCP, TCP_NODELAY, (char *)&optval, sizeof(optval)) < 0) {
BTL_ERROR(("setsockopt(TCP_NODELAY) failed with errno=%d", ompi_socket_errno));
}
#endif
#if defined(SO_SNDBUF)
if(mca_btl_tcp_component.tcp_sndbuf > 0 &&
setsockopt(sd, SOL_SOCKET, SO_SNDBUF, (char *)&mca_btl_tcp_component.tcp_sndbuf, sizeof(int)) < 0) {
BTL_ERROR(("setsockopt(SO_SNDBUF) failed with errno %d", ompi_socket_errno));
}
#endif
#if defined(SO_RCVBUF)
if(mca_btl_tcp_component.tcp_rcvbuf > 0 &&
setsockopt(sd, SOL_SOCKET, SO_RCVBUF, (char *)&mca_btl_tcp_component.tcp_rcvbuf, sizeof(int)) < 0) {
BTL_ERROR(("setsockopt(SO_RCVBUF) failed with errno %d", ompi_socket_errno));
}
#endif
}
/*
* Start a connection to the endpoint. This will likely not complete,
* as the socket is set to non-blocking, so register for event
* notification of connect completion. On connection we send
* our globally unique process identifier to the endpoint and wait for
* the endpoints response.
*/
static int mca_btl_tcp_endpoint_start_connect(mca_btl_base_endpoint_t* btl_endpoint)
{
int rc,flags;
struct sockaddr_in endpoint_addr;
btl_endpoint->endpoint_sd = socket(AF_INET, SOCK_STREAM, 0);
if (btl_endpoint->endpoint_sd < 0) {
btl_endpoint->endpoint_retries++;
return OMPI_ERR_UNREACH;
}
/* setup socket buffer sizes */
mca_btl_tcp_set_socket_options(btl_endpoint->endpoint_sd);
/* setup event callbacks */
mca_btl_tcp_endpoint_event_init(btl_endpoint, btl_endpoint->endpoint_sd);
/* setup the socket as non-blocking */
if((flags = fcntl(btl_endpoint->endpoint_sd, F_GETFL, 0)) < 0) {
BTL_ERROR(("fcntl(F_GETFL) failed with errno=%d", ompi_socket_errno));
} else {
flags |= O_NONBLOCK;
if(fcntl(btl_endpoint->endpoint_sd, F_SETFL, flags) < 0)
BTL_ERROR(("fcntl(F_SETFL) failed with errno=%d", ompi_socket_errno));
}
/* start the connect - will likely fail with EINPROGRESS */
endpoint_addr.sin_family = AF_INET;
endpoint_addr.sin_addr = btl_endpoint->endpoint_addr->addr_inet;
endpoint_addr.sin_port = btl_endpoint->endpoint_addr->addr_port;
if(connect(btl_endpoint->endpoint_sd, (struct sockaddr*)&endpoint_addr, sizeof(endpoint_addr)) < 0) {
/* non-blocking so wait for completion */
IMPORTANT_WINDOWS_COMMENT();
if(ompi_socket_errno == EINPROGRESS || ompi_socket_errno == EWOULDBLOCK) {
btl_endpoint->endpoint_state = MCA_BTL_TCP_CONNECTING;
opal_event_add(&btl_endpoint->endpoint_send_event, 0);
return OMPI_SUCCESS;
}
mca_btl_tcp_endpoint_close(btl_endpoint);
btl_endpoint->endpoint_retries++;
return OMPI_ERR_UNREACH;
}
/* send our globally unique process identifier to the endpoint */
if((rc = mca_btl_tcp_endpoint_send_connect_ack(btl_endpoint)) == OMPI_SUCCESS) {
btl_endpoint->endpoint_state = MCA_BTL_TCP_CONNECT_ACK;
opal_event_add(&btl_endpoint->endpoint_recv_event, 0);
} else {
mca_btl_tcp_endpoint_close(btl_endpoint);
}
return rc;
}
/*
* Check the status of the connection. If the connection failed, will retry
* later. Otherwise, send this processes identifier to the endpoint on the
* newly connected socket.
*/
static void mca_btl_tcp_endpoint_complete_connect(mca_btl_base_endpoint_t* btl_endpoint)
{
int so_error = 0;
ompi_socklen_t so_length = sizeof(so_error);
/* unregister from receiving event notifications */
opal_event_del(&btl_endpoint->endpoint_send_event);
/* check connect completion status */
if(getsockopt(btl_endpoint->endpoint_sd, SOL_SOCKET, SO_ERROR, (char *)&so_error, &so_length) < 0) {
BTL_ERROR(("getsockopt() failed with errno=%d", ompi_socket_errno));
mca_btl_tcp_endpoint_close(btl_endpoint);
return;
}
IMPORTANT_WINDOWS_COMMENT();
if(so_error == EINPROGRESS || so_error == EWOULDBLOCK) {
opal_event_add(&btl_endpoint->endpoint_send_event, 0);
return;
}
if(so_error != 0) {
BTL_ERROR(("connect() failed with errno=%d", so_error));
mca_btl_tcp_endpoint_close(btl_endpoint);
return;
}
if(mca_btl_tcp_endpoint_send_connect_ack(btl_endpoint) == OMPI_SUCCESS) {
btl_endpoint->endpoint_state = MCA_BTL_TCP_CONNECT_ACK;
opal_event_add(&btl_endpoint->endpoint_recv_event, 0);
} else {
mca_btl_tcp_endpoint_close(btl_endpoint);
}
}
/*
* A file descriptor is available/ready for recv. Check the state
* of the socket and take the appropriate action.
*/
static void mca_btl_tcp_endpoint_recv_handler(int sd, short flags, void* user)
{
mca_btl_base_endpoint_t* btl_endpoint = (mca_btl_base_endpoint_t *)user;
OPAL_THREAD_LOCK(&btl_endpoint->endpoint_recv_lock);
switch(btl_endpoint->endpoint_state) {
case MCA_BTL_TCP_CONNECT_ACK:
{
mca_btl_tcp_endpoint_recv_connect_ack(btl_endpoint);
OPAL_THREAD_UNLOCK(&btl_endpoint->endpoint_recv_lock);
break;
}
case MCA_BTL_TCP_CONNECTED:
{
mca_btl_tcp_frag_t* frag = btl_endpoint->endpoint_recv_frag;
if(NULL == frag) {
int rc;
MCA_BTL_TCP_FRAG_ALLOC_MAX(frag, rc);
if(NULL == frag) {
OPAL_THREAD_UNLOCK(&btl_endpoint->endpoint_recv_lock);
return;
}
MCA_BTL_TCP_FRAG_INIT_DST(frag, btl_endpoint);
}
/* check for completion of non-blocking recv on the current fragment */
if(mca_btl_tcp_frag_recv(frag, sd) == false) {
btl_endpoint->endpoint_recv_frag = frag;
OPAL_THREAD_UNLOCK(&btl_endpoint->endpoint_recv_lock);
} else {
btl_endpoint->endpoint_recv_frag = NULL;
OPAL_THREAD_UNLOCK(&btl_endpoint->endpoint_recv_lock);
switch(frag->hdr.type) {
case MCA_BTL_TCP_HDR_TYPE_SEND:
{
mca_btl_base_recv_reg_t* reg = frag->btl->tcp_reg + frag->hdr.base.tag;
reg->cbfunc(&frag->btl->super, frag->hdr.base.tag, &frag->base, reg->cbdata);
break;
}
default:
{
break;
}
}
MCA_BTL_TCP_FRAG_RETURN_MAX(frag);
}
break;
}
case MCA_BTL_TCP_SHUTDOWN:
{
OPAL_THREAD_UNLOCK(&btl_endpoint->endpoint_recv_lock);
break;
}
default:
{
OPAL_THREAD_UNLOCK(&btl_endpoint->endpoint_recv_lock);
BTL_ERROR(("invalid socket state(%d)", btl_endpoint->endpoint_state));
mca_btl_tcp_endpoint_close(btl_endpoint);
break;
}
}
}
/*
* A file descriptor is available/ready for send. Check the state
* of the socket and take the appropriate action.
*/
static void mca_btl_tcp_endpoint_send_handler(int sd, short flags, void* user)
{
mca_btl_tcp_endpoint_t* btl_endpoint = (mca_btl_tcp_endpoint_t *)user;
OPAL_THREAD_LOCK(&btl_endpoint->endpoint_send_lock);
switch(btl_endpoint->endpoint_state) {
case MCA_BTL_TCP_CONNECTING:
mca_btl_tcp_endpoint_complete_connect(btl_endpoint);
break;
case MCA_BTL_TCP_CONNECTED:
{
/* complete the current send */
do {
mca_btl_tcp_frag_t* frag = btl_endpoint->endpoint_send_frag;
if(mca_btl_tcp_frag_send(frag, btl_endpoint->endpoint_sd) == false) {
break;
}
/* if required - update request status and release fragment */
OPAL_THREAD_UNLOCK(&btl_endpoint->endpoint_send_lock);
frag->base.des_cbfunc(&frag->btl->super, frag->endpoint, &frag->base, frag->rc);
OPAL_THREAD_LOCK(&btl_endpoint->endpoint_send_lock);
/* progress any pending sends */
btl_endpoint->endpoint_send_frag = (mca_btl_tcp_frag_t*)
opal_list_remove_first(&btl_endpoint->endpoint_frags);
} while (NULL != btl_endpoint->endpoint_send_frag);
/* if nothing else to do unregister for send event notifications */
if(NULL == btl_endpoint->endpoint_send_frag) {
opal_event_del(&btl_endpoint->endpoint_send_event);
}
break;
}
default:
BTL_ERROR(("invalid connection state (%d)",
btl_endpoint->endpoint_state));
opal_event_del(&btl_endpoint->endpoint_send_event);
break;
}
OPAL_THREAD_UNLOCK(&btl_endpoint->endpoint_send_lock);
}

83
ompi/mca/btl/tcp/btl_tcp_endpoint.h Обычный файл
Просмотреть файл

@ -0,0 +1,83 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#ifndef MCA_BTL_TCP_ENDPOINT_H
#define MCA_BTL_TCP_ENDPOINT_H
#include "opal/class/opal_list.h"
#include "opal/event/event.h"
#include "ompi/mca/pml/pml.h"
#include "ompi/mca/btl/btl.h"
#include "btl_tcp_frag.h"
#include "btl_tcp.h"
#if defined(c_plusplus) || defined(__cplusplus)
extern "C" {
#endif
OBJ_CLASS_DECLARATION(mca_btl_tcp_endpoint_t);
/**
* State of TCP endpoint connection.
*/
typedef enum {
MCA_BTL_TCP_CLOSED,
MCA_BTL_TCP_CONNECTING,
MCA_BTL_TCP_CONNECT_ACK,
MCA_BTL_TCP_CONNECTED,
MCA_BTL_TCP_SHUTDOWN,
MCA_BTL_TCP_FAILED
} mca_btl_tcp_state_t;
/**
* An abstraction that represents a connection to a endpoint process.
* An instance of mca_btl_base_endpoint_t is associated w/ each process
* and BTL pair at startup. However, connections to the endpoint
* are established dynamically on an as-needed basis:
*/
struct mca_btl_base_endpoint_t {
opal_list_item_t super;
struct mca_btl_tcp_module_t* endpoint_btl; /**< BTL instance that created this connection */
struct mca_btl_tcp_proc_t* endpoint_proc; /**< proc structure corresponding to endpoint */
struct mca_btl_tcp_addr_t* endpoint_addr; /**< address of endpoint */
int endpoint_sd; /**< socket connection to endpoint */
struct mca_btl_tcp_frag_t* endpoint_send_frag; /**< current send frag being processed */
struct mca_btl_tcp_frag_t* endpoint_recv_frag; /**< current recv frag being processed */
mca_btl_tcp_state_t endpoint_state; /**< current state of the connection */
size_t endpoint_retries; /**< number of connection retries attempted */
opal_list_t endpoint_frags; /**< list of pending frags to send */
opal_mutex_t endpoint_send_lock; /**< lock for concurrent access to endpoint state */
opal_mutex_t endpoint_recv_lock; /**< lock for concurrent access to endpoint state */
opal_event_t endpoint_send_event; /**< event for async processing of send frags */
opal_event_t endpoint_recv_event; /**< event for async processing of recv frags */
bool endpoint_nbo; /**< convert headers to network byte order? */
};
typedef struct mca_btl_base_endpoint_t mca_btl_base_endpoint_t;
typedef mca_btl_base_endpoint_t mca_btl_tcp_endpoint_t;
void mca_btl_tcp_set_socket_options(int sd);
void mca_btl_tcp_endpoint_close(mca_btl_base_endpoint_t*);
int mca_btl_tcp_endpoint_send(mca_btl_base_endpoint_t*, struct mca_btl_tcp_frag_t*);
bool mca_btl_tcp_endpoint_accept(mca_btl_base_endpoint_t*, struct sockaddr_in*, int);
void mca_btl_tcp_endpoint_shutdown(mca_btl_base_endpoint_t*);
#if defined(c_plusplus) || defined(__cplusplus)
}
#endif
#endif

170
ompi/mca/btl/tcp/btl_tcp_frag.c Обычный файл
Просмотреть файл

@ -0,0 +1,170 @@
#include "ompi_config.h"
#include "include/ompi_socket_errno.h"
#include "ompi/mca/btl/base/btl_base_error.h"
#include "btl_tcp_frag.h"
#include "btl_tcp_endpoint.h"
static void mca_btl_tcp_frag_common_constructor(mca_btl_tcp_frag_t* frag)
{
frag->base.des_src = NULL;
frag->base.des_src_cnt = 0;
frag->base.des_dst = NULL;
frag->base.des_dst_cnt = 0;
}
static void mca_btl_tcp_frag_eager_constructor(mca_btl_tcp_frag_t* frag)
{
frag->size = mca_btl_tcp_module.super.btl_eager_limit;
mca_btl_tcp_frag_common_constructor(frag);
}
static void mca_btl_tcp_frag_max_constructor(mca_btl_tcp_frag_t* frag)
{
frag->size = mca_btl_tcp_module.super.btl_max_send_size;
mca_btl_tcp_frag_common_constructor(frag);
}
static void mca_btl_tcp_frag_user_constructor(mca_btl_tcp_frag_t* frag)
{
frag->size = 0;
mca_btl_tcp_frag_common_constructor(frag);
}
OBJ_CLASS_INSTANCE(
mca_btl_tcp_frag_t,
mca_btl_base_descriptor_t,
NULL,
NULL);
OBJ_CLASS_INSTANCE(
mca_btl_tcp_frag_eager_t,
mca_btl_base_descriptor_t,
mca_btl_tcp_frag_eager_constructor,
NULL);
OBJ_CLASS_INSTANCE(
mca_btl_tcp_frag_max_t,
mca_btl_base_descriptor_t,
mca_btl_tcp_frag_max_constructor,
NULL);
OBJ_CLASS_INSTANCE(
mca_btl_tcp_frag_user_t,
mca_btl_base_descriptor_t,
mca_btl_tcp_frag_user_constructor,
NULL);
bool mca_btl_tcp_frag_send(mca_btl_tcp_frag_t* frag, int sd)
{
int cnt=-1;
size_t i, num_vecs;
/* non-blocking write, but continue if interrupted */
while(cnt < 0) {
cnt = writev(sd, frag->iov_ptr, frag->iov_cnt);
if(cnt < 0) {
switch(ompi_socket_errno) {
case EINTR:
continue;
case EWOULDBLOCK:
/* opal_output(0, "mca_btl_tcp_frag_send: EWOULDBLOCK\n"); */
return false;
case EFAULT:
BTL_ERROR(("writev error (%p, %d)\n\t%s(%d)\n",
frag->iov_ptr[0].iov_base, frag->iov_ptr[0].iov_len,
strerror(ompi_socket_errno), frag->iov_cnt));
default:
{
BTL_ERROR(("writev failed with errno=%d", ompi_socket_errno));
mca_btl_tcp_endpoint_close(frag->endpoint);
return false;
}
}
}
}
/* if the write didn't complete - update the iovec state */
num_vecs = frag->iov_cnt;
for(i=0; i<num_vecs; i++) {
if(cnt >= (int)frag->iov_ptr->iov_len) {
cnt -= frag->iov_ptr->iov_len;
frag->iov_ptr++;
frag->iov_idx++;
frag->iov_cnt--;
} else {
frag->iov_ptr->iov_base = (ompi_iov_base_ptr_t)
(((unsigned char*)frag->iov_ptr->iov_base) + cnt);
frag->iov_ptr->iov_len -= cnt;
break;
}
}
return (frag->iov_cnt == 0);
}
bool mca_btl_tcp_frag_recv(mca_btl_tcp_frag_t* frag, int sd)
{
int cnt=-1;
size_t i, num_vecs;
/* non-blocking read, but continue if interrupted */
while(cnt < 0) {
cnt = readv(sd, frag->iov_ptr, frag->iov_cnt);
if(cnt < 0) {
switch(ompi_socket_errno) {
case EINTR:
continue;
case EWOULDBLOCK:
return false;
case EFAULT:
opal_output( 0, "mca_btl_tcp_frag_send: writev error (%p, %d)\n\t%s(%d)\n",
frag->iov_ptr[0].iov_base, frag->iov_ptr[0].iov_len,
strerror(ompi_socket_errno), frag->iov_cnt );
default:
{
opal_output(0, "mca_btl_tcp_frag_send: writev failed with errno=%d",
ompi_socket_errno);
mca_btl_tcp_endpoint_close(frag->endpoint);
return false;
}
}
}
}
/* if the write didn't complete - update the iovec state */
num_vecs = frag->iov_cnt;
for(i=0; i<num_vecs; i++) {
if(cnt >= (int)frag->iov_ptr->iov_len) {
cnt -= frag->iov_ptr->iov_len;
frag->iov_idx++;
frag->iov_ptr++;
frag->iov_cnt--;
} else {
frag->iov_ptr->iov_base = (ompi_iov_base_ptr_t)
(((unsigned char*)frag->iov_ptr->iov_base) + cnt);
frag->iov_ptr->iov_len -= cnt;
break;
}
}
/* read header */
if(frag->iov_cnt == 0 && frag->iov_idx == 1) {
switch(frag->hdr.type) {
case MCA_BTL_TCP_HDR_TYPE_SEND:
frag->iov[1].iov_base = (frag+1);
frag->iov[1].iov_len = frag->hdr.size;
frag->segments[0].seg_addr.pval = frag+1;
frag->segments[0].seg_len = frag->hdr.size;
frag->iov_cnt++;
return false;
default:
break;
}
}
return (frag->iov_cnt == 0);
}

157
ompi/mca/btl/tcp/btl_tcp_frag.h Обычный файл
Просмотреть файл

@ -0,0 +1,157 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#ifndef MCA_BTL_TCP_FRAG_H
#define MCA_BTL_TCP_FRAG_H
#define MCA_BTL_TCP_FRAG_ALIGN (8)
#include "ompi_config.h"
#include "btl_tcp.h"
#include "btl_tcp_hdr.h"
#if defined(c_plusplus) || defined(__cplusplus)
extern "C" {
#endif
OMPI_DECLSPEC OBJ_CLASS_DECLARATION(mca_btl_tcp_frag_t);
/**
* TCP fragment derived type.
*/
struct mca_btl_tcp_frag_t {
mca_btl_base_descriptor_t base;
mca_btl_base_segment_t segments[2];
struct mca_btl_base_endpoint_t *endpoint;
struct mca_btl_tcp_module_t* btl;
mca_btl_tcp_hdr_t hdr;
struct iovec iov[3];
struct iovec *iov_ptr;
size_t iov_cnt;
size_t iov_idx;
size_t size;
int rc;
};
typedef struct mca_btl_tcp_frag_t mca_btl_tcp_frag_t;
OBJ_CLASS_DECLARATION(mca_btl_tcp_frag_t);
typedef struct mca_btl_tcp_frag_t mca_btl_tcp_frag_eager_t;
OBJ_CLASS_DECLARATION(mca_btl_tcp_frag_eager_t);
typedef struct mca_btl_tcp_frag_t mca_btl_tcp_frag_max_t;
OBJ_CLASS_DECLARATION(mca_btl_tcp_frag_max_t);
typedef struct mca_btl_tcp_frag_t mca_btl_tcp_frag_user_t;
OBJ_CLASS_DECLARATION(mca_btl_tcp_frag_user_t);
/*
* Macros to allocate/return descriptors from module specific
* free list(s).
*/
#define MCA_BTL_TCP_FRAG_ALLOC_EAGER(frag, rc) \
{ \
\
opal_list_item_t *item; \
OMPI_FREE_LIST_WAIT(&mca_btl_tcp_component.tcp_frag_eager, item, rc); \
frag = (mca_btl_tcp_frag_t*) item; \
}
#define MCA_BTL_TCP_FRAG_RETURN_EAGER(frag) \
{ \
OMPI_FREE_LIST_RETURN(&mca_btl_tcp_component.tcp_frag_eager, \
(opal_list_item_t*)(frag)); \
}
#define MCA_BTL_TCP_FRAG_ALLOC_MAX(frag, rc) \
{ \
\
opal_list_item_t *item; \
OMPI_FREE_LIST_WAIT(&mca_btl_tcp_component.tcp_frag_max, item, rc); \
frag = (mca_btl_tcp_frag_t*) item; \
}
#define MCA_BTL_TCP_FRAG_RETURN_MAX(frag) \
{ \
OMPI_FREE_LIST_RETURN(&mca_btl_tcp_component.tcp_frag_max, \
(opal_list_item_t*)(frag)); \
}
#define MCA_BTL_TCP_FRAG_ALLOC_USER(frag, rc) \
{ \
opal_list_item_t *item; \
OMPI_FREE_LIST_WAIT(&mca_btl_tcp_component.tcp_frag_user, item, rc); \
frag = (mca_btl_tcp_frag_t*) item; \
}
#define MCA_BTL_TCP_FRAG_RETURN_USER(frag) \
{ \
OMPI_FREE_LIST_RETURN(&mca_btl_tcp_component.tcp_frag_user, \
(opal_list_item_t*)(frag)); \
}
#define MCA_BTL_TCP_FRAG_INIT_SRC(frag,endpoint) \
do { \
size_t i; \
frag->rc = 0; \
frag->hdr.size = 0; \
frag->iov_idx = 0; \
frag->endpoint = endpoint; \
frag->hdr.size = 0; \
frag->iov_cnt = 1; \
frag->iov_ptr = frag->iov; \
frag->iov[0].iov_base = &frag->hdr; \
frag->iov[0].iov_len = sizeof(frag->hdr); \
for(i=0; i<frag->base.des_src_cnt; i++) { \
frag->hdr.size += frag->segments[i].seg_len; \
frag->iov[i+1].iov_len = frag->segments[i].seg_len; \
frag->iov[i+1].iov_base = frag->segments[i].seg_addr.pval; \
frag->iov_cnt++; \
} \
} while(0)
#define MCA_BTL_TCP_FRAG_INIT_DST(frag,ep) \
do { \
frag->rc = 0; \
frag->btl = ep->endpoint_btl; \
frag->endpoint = ep; \
frag->iov[0].iov_len = sizeof(frag->hdr); \
frag->iov[0].iov_base = &frag->hdr; \
frag->iov_cnt = 1; \
frag->iov_idx = 0; \
frag->iov_ptr = frag->iov; \
frag->base.des_src = NULL; \
frag->base.des_dst_cnt = 0; \
frag->base.des_dst = frag->segments; \
frag->base.des_dst_cnt = 1; \
} while(0)
bool mca_btl_tcp_frag_send(mca_btl_tcp_frag_t*, int sd);
bool mca_btl_tcp_frag_recv(mca_btl_tcp_frag_t*, int sd);
#if defined(c_plusplus) || defined(__cplusplus)
}
#endif
#endif

48
ompi/mca/btl/tcp/btl_tcp_hdr.h Обычный файл
Просмотреть файл

@ -0,0 +1,48 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#ifndef MCA_BTL_TCP_HDR_H
#define MCA_BTL_TCP_HDR_H
#include "ompi_config.h"
#include "mca/btl/base/base.h"
#include "btl_tcp.h"
#if defined(c_plusplus) || defined(__cplusplus)
extern "C" {
#endif
/**
* TCP header.
*/
#define MCA_BTL_TCP_HDR_TYPE_SEND 1
#define MCA_BTL_TCP_HDR_TYPE_PUT 2
#define MCA_BTL_TCP_HDR_TYPE_GET 3
struct mca_btl_tcp_hdr_t {
mca_btl_base_header_t base;
uint8_t type;
uint64_t size;
};
typedef struct mca_btl_tcp_hdr_t mca_btl_tcp_hdr_t;
#if defined(c_plusplus) || defined(__cplusplus)
}
#endif
#endif

233
ompi/mca/btl/tcp/btl_tcp_proc.c Обычный файл
Просмотреть файл

@ -0,0 +1,233 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include "orte/class/orte_proc_table.h"
#include "ompi/mca/btl/base/btl_base_error.h"
#include "ompi/mca/pml/base/pml_base_module_exchange.h"
#include "btl_tcp.h"
#include "btl_tcp_proc.h"
static void mca_btl_tcp_proc_construct(mca_btl_tcp_proc_t* proc);
static void mca_btl_tcp_proc_destruct(mca_btl_tcp_proc_t* proc);
OBJ_CLASS_INSTANCE(
mca_btl_tcp_proc_t,
opal_list_item_t,
mca_btl_tcp_proc_construct,
mca_btl_tcp_proc_destruct);
void mca_btl_tcp_proc_construct(mca_btl_tcp_proc_t* proc)
{
proc->proc_ompi = 0;
proc->proc_addrs = NULL;
proc->proc_addr_count = 0;
proc->proc_endpoints = NULL;
proc->proc_endpoint_count = 0;
OBJ_CONSTRUCT(&proc->proc_lock, opal_mutex_t);
}
/*
* Cleanup ib proc instance
*/
void mca_btl_tcp_proc_destruct(mca_btl_tcp_proc_t* proc)
{
/* remove from list of all proc instances */
OPAL_THREAD_LOCK(&mca_btl_tcp_component.tcp_lock);
orte_hash_table_remove_proc(&mca_btl_tcp_component.tcp_procs, &proc->proc_name);
OPAL_THREAD_UNLOCK(&mca_btl_tcp_component.tcp_lock);
/* release resources */
if(NULL != proc->proc_endpoints) {
free(proc->proc_endpoints);
OBJ_DESTRUCT(&proc->proc_lock);
}
}
/*
* Create a TCP process structure. There is a one-to-one correspondence
* between a ompi_proc_t and a mca_btl_tcp_proc_t instance. We cache
* additional data (specifically the list of mca_btl_tcp_endpoint_t instances,
* and published addresses) associated w/ a given destination on this
* datastructure.
*/
mca_btl_tcp_proc_t* mca_btl_tcp_proc_create(ompi_proc_t* ompi_proc)
{
int rc;
size_t size;
mca_btl_tcp_proc_t* btl_proc;
OPAL_THREAD_LOCK(&mca_btl_tcp_component.tcp_lock);
btl_proc = (mca_btl_tcp_proc_t*)orte_hash_table_get_proc(
&mca_btl_tcp_component.tcp_procs, &ompi_proc->proc_name);
if(NULL != btl_proc) {
OPAL_THREAD_UNLOCK(&mca_btl_tcp_component.tcp_lock);
return btl_proc;
}
btl_proc = OBJ_NEW(mca_btl_tcp_proc_t);
if(NULL == btl_proc)
return NULL;
btl_proc->proc_ompi = ompi_proc;
btl_proc->proc_name = ompi_proc->proc_name;
/* add to hash table of all proc instance */
orte_hash_table_set_proc(
&mca_btl_tcp_component.tcp_procs,
&btl_proc->proc_name,
btl_proc);
OPAL_THREAD_UNLOCK(&mca_btl_tcp_component.tcp_lock);
/* lookup tcp parameters exported by this proc */
rc = mca_base_modex_recv( &mca_btl_tcp_component.super.btl_version,
ompi_proc,
(void**)&btl_proc->proc_addrs,
&size);
if(rc != OMPI_SUCCESS) {
BTL_ERROR(("mca_base_modex_recv: failed with return value=%d", rc));
OBJ_RELEASE(btl_proc);
return NULL;
}
if(0 != (size % sizeof(mca_btl_tcp_addr_t))) {
BTL_ERROR(("mca_base_modex_recv: invalid size %d\n", size));
return NULL;
}
btl_proc->proc_addr_count = size / sizeof(mca_btl_tcp_addr_t);
/* allocate space for endpoint array - one for each exported address */
btl_proc->proc_endpoints = (mca_btl_base_endpoint_t**)
malloc(btl_proc->proc_addr_count * sizeof(mca_btl_base_endpoint_t*));
if(NULL == btl_proc->proc_endpoints) {
OBJ_RELEASE(btl_proc);
return NULL;
}
if(NULL == mca_btl_tcp_component.tcp_local && ompi_proc == ompi_proc_local())
mca_btl_tcp_component.tcp_local = btl_proc;
return btl_proc;
}
/*
* Note that this routine must be called with the lock on the process
* already held. Insert a btl instance into the proc array and assign
* it an address.
*/
int mca_btl_tcp_proc_insert(
mca_btl_tcp_proc_t* btl_proc,
mca_btl_base_endpoint_t* btl_endpoint)
{
struct mca_btl_tcp_module_t *btl_tcp = btl_endpoint->endpoint_btl;
size_t i;
unsigned long net1;
/* insert into endpoint array */
btl_endpoint->endpoint_proc = btl_proc;
btl_proc->proc_endpoints[btl_proc->proc_endpoint_count++] = btl_endpoint;
net1 = btl_tcp->tcp_ifaddr.sin_addr.s_addr & btl_tcp->tcp_ifmask.sin_addr.s_addr;
/*
* Look through the proc instance for an address that is on the
* directly attached network. If we don't find one, pick the first
* unused address.
*/
for(i=0; i<btl_proc->proc_addr_count; i++) {
mca_btl_tcp_addr_t* endpoint_addr = btl_proc->proc_addrs + i;
unsigned long net2 = endpoint_addr->addr_inet.s_addr & btl_tcp->tcp_ifmask.sin_addr.s_addr;
if(endpoint_addr->addr_inuse != 0)
continue;
if(net1 == net2) {
btl_endpoint->endpoint_addr = endpoint_addr;
break;
} else if(btl_endpoint->endpoint_addr != 0)
btl_endpoint->endpoint_addr = endpoint_addr;
}
/* Make sure there is a common interface */
if( NULL != btl_endpoint->endpoint_addr ) {
btl_endpoint->endpoint_addr->addr_inuse++;
return OMPI_SUCCESS;
}
return OMPI_ERR_UNREACH;
}
/*
* Remove an endpoint from the proc array and indicate the address is
* no longer in use.
*/
int mca_btl_tcp_proc_remove(mca_btl_tcp_proc_t* btl_proc, mca_btl_base_endpoint_t* btl_endpoint)
{
size_t i;
OPAL_THREAD_LOCK(&btl_proc->proc_lock);
for(i=0; i<btl_proc->proc_endpoint_count; i++) {
if(btl_proc->proc_endpoints[i] == btl_endpoint) {
memmove(btl_proc->proc_endpoints+i, btl_proc->proc_endpoints+i+1,
(btl_proc->proc_endpoint_count-i-1)*sizeof(mca_btl_base_endpoint_t*));
if(--btl_proc->proc_endpoint_count == 0) {
OPAL_THREAD_UNLOCK(&btl_proc->proc_lock);
OBJ_RELEASE(btl_proc);
return OMPI_SUCCESS;
}
btl_endpoint->endpoint_addr->addr_inuse--;
break;
}
}
OPAL_THREAD_UNLOCK(&btl_proc->proc_lock);
return OMPI_SUCCESS;
}
/*
* Look for an existing TCP process instance based on the globally unique
* process identifier.
*/
mca_btl_tcp_proc_t* mca_btl_tcp_proc_lookup(const orte_process_name_t *name)
{
mca_btl_tcp_proc_t* proc;
OPAL_THREAD_LOCK(&mca_btl_tcp_component.tcp_lock);
proc = (mca_btl_tcp_proc_t*)orte_hash_table_get_proc(
&mca_btl_tcp_component.tcp_procs, name);
OPAL_THREAD_UNLOCK(&mca_btl_tcp_component.tcp_lock);
return proc;
}
/*
* loop through all available PTLs for one matching the source address
* of the request.
*/
bool mca_btl_tcp_proc_accept(mca_btl_tcp_proc_t* btl_proc, struct sockaddr_in* addr, int sd)
{
size_t i;
OPAL_THREAD_LOCK(&btl_proc->proc_lock);
for(i=0; i<btl_proc->proc_endpoint_count; i++) {
mca_btl_base_endpoint_t* btl_endpoint = btl_proc->proc_endpoints[i];
if(mca_btl_tcp_endpoint_accept(btl_endpoint, addr, sd)) {
OPAL_THREAD_UNLOCK(&btl_proc->proc_lock);
return true;
}
}
OPAL_THREAD_UNLOCK(&btl_proc->proc_lock);
return false;
}

85
ompi/mca/btl/tcp/btl_tcp_proc.h Обычный файл
Просмотреть файл

@ -0,0 +1,85 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#ifndef MCA_BTL_TCP_PROC_H
#define MCA_BTL_TCP_PROC_H
#include "opal/class/opal_object.h"
#include "orte/mca/ns/ns.h"
#include "ompi/proc/proc.h"
#include "btl_tcp.h"
#include "btl_tcp_addr.h"
#include "btl_tcp_endpoint.h"
#if defined(c_plusplus) || defined(__cplusplus)
extern "C" {
#endif
OBJ_CLASS_DECLARATION(mca_btl_tcp_proc_t);
/**
* Represents the state of a remote process and the set of addresses
* that it exports. Also cache an instance of mca_btl_base_endpoint_t for
* each
* BTL instance that attempts to open a connection to the process.
*/
struct mca_btl_tcp_proc_t {
opal_list_item_t super;
/**< allow proc to be placed on a list */
ompi_proc_t *proc_ompi;
/**< pointer to corresponding ompi_proc_t */
orte_process_name_t proc_name;
/**< globally unique identifier for the process */
struct mca_btl_tcp_addr_t* proc_addrs;
/**< array of addresses exported by peer */
size_t proc_addr_count;
/**< number of addresses published by endpoint */
struct mca_btl_base_endpoint_t **proc_endpoints;
/**< array of endpoints that have been created to access this proc */
size_t proc_endpoint_count;
/**< number of endpoints */
opal_mutex_t proc_lock;
/**< lock to protect against concurrent access to proc state */
};
typedef struct mca_btl_tcp_proc_t mca_btl_tcp_proc_t;
mca_btl_tcp_proc_t* mca_btl_tcp_proc_create(ompi_proc_t* ompi_proc);
mca_btl_tcp_proc_t* mca_btl_tcp_proc_lookup(const orte_process_name_t* name);
int mca_btl_tcp_proc_insert(mca_btl_tcp_proc_t*, mca_btl_base_endpoint_t*);
int mca_btl_tcp_proc_remove(mca_btl_tcp_proc_t*, mca_btl_base_endpoint_t*);
bool mca_btl_tcp_proc_accept(mca_btl_tcp_proc_t*, struct sockaddr_in*, int);
/**
* Inlined function to return local TCP proc instance.
*/
static inline mca_btl_tcp_proc_t* mca_btl_tcp_proc_local(void)
{
if(NULL == mca_btl_tcp_component.tcp_local)
mca_btl_tcp_component.tcp_local = mca_btl_tcp_proc_create(ompi_proc_local());
return mca_btl_tcp_component.tcp_local;
}
#if defined(c_plusplus) || defined(__cplusplus)
}
#endif
#endif

22
ompi/mca/btl/tcp/configure.params Обычный файл
Просмотреть файл

@ -0,0 +1,22 @@
# -*- shell-script -*-
#
# Copyright (c) 2004-2005 The Trustees of Indiana University.
# All rights reserved.
# Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
# All rights reserved.
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
# Specific to this module
PARAM_INIT_FILE=btl_tcp.c
PARAM_CONFIG_HEADER_FILE="tcp_config.h"
PARAM_CONFIG_FILES="Makefile"