2005-08-02 17:20:50 +04:00
|
|
|
#include "ompi_config.h"
|
2005-08-02 18:59:50 +04:00
|
|
|
|
|
|
|
#ifdef HAVE_SYS_TYPES_H
|
|
|
|
#include <sys/types.h>
|
|
|
|
#endif
|
|
|
|
#ifdef HAVE_SYS_UIO_H
|
|
|
|
#include <sys/uio.h>
|
|
|
|
#endif
|
2005-12-11 01:04:28 +03:00
|
|
|
#ifdef HAVE_UNISTD_H
|
2005-08-02 18:59:50 +04:00
|
|
|
#include <unistd.h>
|
2005-12-11 01:04:28 +03:00
|
|
|
#endif /* HAVE_UNISTD_H */
|
2005-08-02 18:59:50 +04:00
|
|
|
|
2006-02-12 04:33:29 +03:00
|
|
|
#include "orte/orte_socket_errno.h"
|
2005-08-02 17:20:50 +04:00
|
|
|
#include "ompi/mca/btl/base/btl_base_error.h"
|
|
|
|
#include "btl_tcp_frag.h"
|
|
|
|
#include "btl_tcp_endpoint.h"
|
2005-09-13 00:22:59 +04:00
|
|
|
#include "orte/util/proc_info.h"
|
2005-08-02 17:20:50 +04:00
|
|
|
|
|
|
|
static void mca_btl_tcp_frag_common_constructor(mca_btl_tcp_frag_t* frag)
|
|
|
|
{
|
|
|
|
frag->base.des_src = NULL;
|
|
|
|
frag->base.des_src_cnt = 0;
|
|
|
|
frag->base.des_dst = NULL;
|
|
|
|
frag->base.des_dst_cnt = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void mca_btl_tcp_frag_eager_constructor(mca_btl_tcp_frag_t* frag)
|
|
|
|
{
|
|
|
|
frag->size = mca_btl_tcp_module.super.btl_eager_limit;
|
|
|
|
mca_btl_tcp_frag_common_constructor(frag);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void mca_btl_tcp_frag_max_constructor(mca_btl_tcp_frag_t* frag)
|
|
|
|
{
|
|
|
|
frag->size = mca_btl_tcp_module.super.btl_max_send_size;
|
|
|
|
mca_btl_tcp_frag_common_constructor(frag);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void mca_btl_tcp_frag_user_constructor(mca_btl_tcp_frag_t* frag)
|
|
|
|
{
|
|
|
|
frag->size = 0;
|
|
|
|
mca_btl_tcp_frag_common_constructor(frag);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
OBJ_CLASS_INSTANCE(
|
|
|
|
mca_btl_tcp_frag_t,
|
|
|
|
mca_btl_base_descriptor_t,
|
|
|
|
NULL,
|
|
|
|
NULL);
|
|
|
|
|
|
|
|
OBJ_CLASS_INSTANCE(
|
|
|
|
mca_btl_tcp_frag_eager_t,
|
|
|
|
mca_btl_base_descriptor_t,
|
|
|
|
mca_btl_tcp_frag_eager_constructor,
|
|
|
|
NULL);
|
|
|
|
|
|
|
|
OBJ_CLASS_INSTANCE(
|
|
|
|
mca_btl_tcp_frag_max_t,
|
|
|
|
mca_btl_base_descriptor_t,
|
|
|
|
mca_btl_tcp_frag_max_constructor,
|
|
|
|
NULL);
|
|
|
|
|
|
|
|
OBJ_CLASS_INSTANCE(
|
|
|
|
mca_btl_tcp_frag_user_t,
|
|
|
|
mca_btl_base_descriptor_t,
|
|
|
|
mca_btl_tcp_frag_user_constructor,
|
|
|
|
NULL);
|
|
|
|
|
|
|
|
|
|
|
|
bool mca_btl_tcp_frag_send(mca_btl_tcp_frag_t* frag, int sd)
|
|
|
|
{
|
|
|
|
int cnt=-1;
|
|
|
|
size_t i, num_vecs;
|
|
|
|
|
|
|
|
/* non-blocking write, but continue if interrupted */
|
|
|
|
while(cnt < 0) {
|
|
|
|
cnt = writev(sd, frag->iov_ptr, frag->iov_cnt);
|
|
|
|
if(cnt < 0) {
|
|
|
|
switch(ompi_socket_errno) {
|
|
|
|
case EINTR:
|
|
|
|
continue;
|
|
|
|
case EWOULDBLOCK:
|
|
|
|
/* opal_output(0, "mca_btl_tcp_frag_send: EWOULDBLOCK\n"); */
|
|
|
|
return false;
|
|
|
|
case EFAULT:
|
|
|
|
BTL_ERROR(("writev error (%p, %d)\n\t%s(%d)\n",
|
|
|
|
frag->iov_ptr[0].iov_base, frag->iov_ptr[0].iov_len,
|
|
|
|
strerror(ompi_socket_errno), frag->iov_cnt));
|
|
|
|
default:
|
|
|
|
{
|
|
|
|
BTL_ERROR(("writev failed with errno=%d", ompi_socket_errno));
|
|
|
|
mca_btl_tcp_endpoint_close(frag->endpoint);
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2005-12-07 03:12:59 +03:00
|
|
|
|
2005-08-02 17:20:50 +04:00
|
|
|
/* if the write didn't complete - update the iovec state */
|
|
|
|
num_vecs = frag->iov_cnt;
|
|
|
|
for(i=0; i<num_vecs; i++) {
|
|
|
|
if(cnt >= (int)frag->iov_ptr->iov_len) {
|
|
|
|
cnt -= frag->iov_ptr->iov_len;
|
|
|
|
frag->iov_ptr++;
|
|
|
|
frag->iov_idx++;
|
|
|
|
frag->iov_cnt--;
|
|
|
|
} else {
|
|
|
|
frag->iov_ptr->iov_base = (ompi_iov_base_ptr_t)
|
|
|
|
(((unsigned char*)frag->iov_ptr->iov_base) + cnt);
|
|
|
|
frag->iov_ptr->iov_len -= cnt;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return (frag->iov_cnt == 0);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
bool mca_btl_tcp_frag_recv(mca_btl_tcp_frag_t* frag, int sd)
|
|
|
|
{
|
2005-11-29 20:33:01 +03:00
|
|
|
int cnt;
|
2006-01-14 23:21:44 +03:00
|
|
|
size_t i, num_vecs;
|
2005-12-07 03:12:59 +03:00
|
|
|
mca_btl_base_endpoint_t* btl_endpoint = frag->endpoint;
|
|
|
|
|
|
|
|
repeat:
|
2006-01-14 23:21:44 +03:00
|
|
|
num_vecs = frag->iov_cnt;
|
2005-12-07 03:12:59 +03:00
|
|
|
#if MCA_BTL_TCP_ENDPOINT_CACHE
|
|
|
|
if( 0 != btl_endpoint->endpoint_cache_length ) {
|
|
|
|
size_t length = btl_endpoint->endpoint_cache_length;
|
2006-01-14 23:21:44 +03:00
|
|
|
/* It's strange at the first look but cnt have to be set to the full amount of data available.
|
|
|
|
* After going to advance_iov_position we will use cnt to detect if there is still some
|
|
|
|
* data pending.
|
|
|
|
*/
|
2005-12-07 03:12:59 +03:00
|
|
|
cnt = btl_endpoint->endpoint_cache_length;
|
|
|
|
for( i = 0; i < frag->iov_cnt; i++ ) {
|
|
|
|
if( length > frag->iov_ptr[i].iov_len )
|
|
|
|
length = frag->iov_ptr[0].iov_len;
|
2006-01-14 23:21:44 +03:00
|
|
|
memcpy( frag->iov_ptr[i].iov_base, btl_endpoint->endpoint_cache_pos, length );
|
2005-12-07 03:12:59 +03:00
|
|
|
btl_endpoint->endpoint_cache_pos += length;
|
|
|
|
btl_endpoint->endpoint_cache_length -= length;
|
|
|
|
length = btl_endpoint->endpoint_cache_length;
|
2006-01-14 23:21:44 +03:00
|
|
|
if( 0 == length ) {
|
|
|
|
btl_endpoint->endpoint_cache_pos = btl_endpoint->endpoint_cache;
|
|
|
|
break;
|
|
|
|
}
|
2005-12-07 03:12:59 +03:00
|
|
|
}
|
|
|
|
goto advance_iov_position;
|
|
|
|
}
|
2006-01-14 23:21:44 +03:00
|
|
|
/* What's happens if all iovecs are used by the fragment ? It still work, as we reserve one
|
|
|
|
* iovec for the caching in the fragment structure (the +1).
|
|
|
|
*/
|
2005-12-07 03:12:59 +03:00
|
|
|
frag->iov_ptr[num_vecs].iov_base = btl_endpoint->endpoint_cache;
|
|
|
|
frag->iov_ptr[num_vecs].iov_len = mca_btl_tcp_component.tcp_endpoint_cache;
|
|
|
|
num_vecs++;
|
|
|
|
#endif /* MCA_BTL_TCP_ENDPOINT_CACHE */
|
2005-11-29 20:33:01 +03:00
|
|
|
|
2005-08-02 17:20:50 +04:00
|
|
|
/* non-blocking read, but continue if interrupted */
|
2005-11-29 20:33:01 +03:00
|
|
|
cnt = -1;
|
2005-12-07 03:12:59 +03:00
|
|
|
while( cnt < 0 ) {
|
|
|
|
cnt = readv(sd, frag->iov_ptr, num_vecs);
|
2005-08-02 17:20:50 +04:00
|
|
|
if(cnt < 0) {
|
|
|
|
switch(ompi_socket_errno) {
|
|
|
|
case EINTR:
|
|
|
|
continue;
|
|
|
|
case EWOULDBLOCK:
|
|
|
|
return false;
|
|
|
|
case EFAULT:
|
|
|
|
opal_output( 0, "mca_btl_tcp_frag_send: writev error (%p, %d)\n\t%s(%d)\n",
|
2005-12-07 03:12:59 +03:00
|
|
|
frag->iov_ptr[0].iov_base, frag->iov_ptr[0].iov_len,
|
|
|
|
strerror(ompi_socket_errno), frag->iov_cnt );
|
2005-08-02 17:20:50 +04:00
|
|
|
default:
|
2006-01-14 23:21:44 +03:00
|
|
|
opal_output(0, "mca_btl_tcp_frag_send: writev failed with errno=%d",
|
|
|
|
ompi_socket_errno);
|
|
|
|
mca_btl_tcp_endpoint_close(btl_endpoint);
|
|
|
|
return false;
|
2005-08-02 17:20:50 +04:00
|
|
|
}
|
|
|
|
}
|
2006-01-14 23:21:44 +03:00
|
|
|
if( cnt == 0 ) {
|
2005-12-07 03:12:59 +03:00
|
|
|
mca_btl_tcp_endpoint_close(btl_endpoint);
|
2005-09-15 01:27:30 +04:00
|
|
|
return false;
|
|
|
|
}
|
2005-12-07 03:12:59 +03:00
|
|
|
goto advance_iov_position;
|
|
|
|
};
|
2005-09-15 01:27:30 +04:00
|
|
|
|
2005-12-07 03:12:59 +03:00
|
|
|
advance_iov_position:
|
2005-08-02 17:20:50 +04:00
|
|
|
/* if the write didn't complete - update the iovec state */
|
|
|
|
num_vecs = frag->iov_cnt;
|
2006-01-14 23:21:44 +03:00
|
|
|
for( i = 0; i < num_vecs; i++ ) {
|
|
|
|
if( cnt >= (int)frag->iov_ptr->iov_len ) {
|
2005-08-02 17:20:50 +04:00
|
|
|
cnt -= frag->iov_ptr->iov_len;
|
|
|
|
frag->iov_idx++;
|
|
|
|
frag->iov_ptr++;
|
|
|
|
frag->iov_cnt--;
|
|
|
|
} else {
|
|
|
|
frag->iov_ptr->iov_base = (ompi_iov_base_ptr_t)
|
|
|
|
(((unsigned char*)frag->iov_ptr->iov_base) + cnt);
|
|
|
|
frag->iov_ptr->iov_len -= cnt;
|
2006-01-14 23:21:44 +03:00
|
|
|
cnt = 0;
|
2005-08-02 17:20:50 +04:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2005-12-07 03:12:59 +03:00
|
|
|
#if MCA_BTL_TCP_ENDPOINT_CACHE
|
|
|
|
btl_endpoint->endpoint_cache_length = cnt;
|
|
|
|
#endif /* MCA_BTL_TCP_ENDPOINT_CACHE */
|
|
|
|
|
2005-08-02 17:20:50 +04:00
|
|
|
/* read header */
|
2005-08-09 18:10:17 +04:00
|
|
|
if(frag->iov_cnt == 0) {
|
2005-08-02 17:20:50 +04:00
|
|
|
switch(frag->hdr.type) {
|
2005-12-07 03:12:59 +03:00
|
|
|
case MCA_BTL_TCP_HDR_TYPE_SEND:
|
|
|
|
if(frag->iov_idx == 1 && frag->hdr.size) {
|
|
|
|
frag->iov[1].iov_base = (void*)(frag+1);
|
|
|
|
frag->iov[1].iov_len = frag->hdr.size;
|
|
|
|
frag->segments[0].seg_addr.pval = frag+1;
|
|
|
|
frag->segments[0].seg_len = frag->hdr.size;
|
|
|
|
frag->iov_cnt++;
|
|
|
|
goto repeat;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case MCA_BTL_TCP_HDR_TYPE_PUT:
|
|
|
|
if(frag->iov_idx == 1) {
|
|
|
|
frag->iov[1].iov_base = (void*)frag->segments;
|
|
|
|
frag->iov[1].iov_len = frag->hdr.count * sizeof(mca_btl_base_segment_t);
|
|
|
|
frag->iov_cnt++;
|
|
|
|
goto repeat;
|
|
|
|
} else if (frag->iov_idx == 2) {
|
|
|
|
for(i=0; i<frag->hdr.count; i++) {
|
|
|
|
frag->iov[i+2].iov_base = frag->segments[i].seg_addr.pval;
|
|
|
|
frag->iov[i+2].iov_len = frag->segments[i].seg_len;
|
2005-08-09 18:10:17 +04:00
|
|
|
frag->iov_cnt++;
|
|
|
|
}
|
2005-12-07 03:12:59 +03:00
|
|
|
goto repeat;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case MCA_BTL_TCP_HDR_TYPE_GET:
|
|
|
|
default:
|
|
|
|
break;
|
2005-08-02 17:20:50 +04:00
|
|
|
}
|
2005-08-09 18:10:17 +04:00
|
|
|
return true;
|
2005-08-02 17:20:50 +04:00
|
|
|
}
|
2005-12-07 03:12:59 +03:00
|
|
|
return false;
|
2005-08-02 17:20:50 +04:00
|
|
|
}
|
|
|
|
|