2004-01-21 03:05:46 +03:00
|
|
|
/*
|
2007-03-17 02:11:45 +03:00
|
|
|
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
|
2005-11-05 22:57:48 +03:00
|
|
|
* University Research and Technology
|
|
|
|
* Corporation. All rights reserved.
|
|
|
|
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
|
|
|
* of Tennessee Research Foundation. All rights
|
|
|
|
* reserved.
|
2004-11-28 23:09:25 +03:00
|
|
|
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
|
|
|
* University of Stuttgart. All rights reserved.
|
2005-03-24 15:43:37 +03:00
|
|
|
* Copyright (c) 2004-2005 The Regents of the University of California.
|
|
|
|
* All rights reserved.
|
2012-04-10 02:14:14 +04:00
|
|
|
* Copyright (c) 2012 Los Alamos National Security, LLC.
|
|
|
|
* All rights reserved.
|
2012-06-27 01:42:16 +04:00
|
|
|
* Copyright (c) 2012 Oak Ridge National Labs. All rights reserved.
|
2004-11-22 04:38:40 +03:00
|
|
|
* $COPYRIGHT$
|
|
|
|
*
|
|
|
|
* Additional copyrights may follow
|
|
|
|
*
|
2004-10-15 23:31:47 +04:00
|
|
|
* $HEADER$
|
2004-01-21 03:05:46 +03:00
|
|
|
*/
|
2004-06-07 19:33:53 +04:00
|
|
|
#include "ompi_config.h"
|
2004-01-21 03:05:46 +03:00
|
|
|
#include <stdio.h>
|
2004-08-08 23:20:19 +04:00
|
|
|
#include <string.h>
|
2004-10-20 05:03:09 +04:00
|
|
|
#ifdef HAVE_UNISTD_H
|
2004-08-08 23:20:19 +04:00
|
|
|
#include <unistd.h>
|
2004-10-20 05:03:09 +04:00
|
|
|
#endif
|
|
|
|
#ifdef HAVE_SYS_TYPES_H
|
2004-08-08 23:20:19 +04:00
|
|
|
#include <sys/types.h>
|
2004-10-20 05:03:09 +04:00
|
|
|
#endif
|
|
|
|
#ifdef HAVE_SYS_SOCKET_H
|
2004-08-08 23:20:19 +04:00
|
|
|
#include <sys/socket.h>
|
2004-10-20 05:03:09 +04:00
|
|
|
#endif
|
2004-08-08 23:20:19 +04:00
|
|
|
#include <errno.h>
|
2004-10-20 05:03:09 +04:00
|
|
|
#ifdef HAVE_NETINET_IN_H
|
2004-08-08 23:20:19 +04:00
|
|
|
#include <netinet/in.h>
|
2004-10-20 05:03:09 +04:00
|
|
|
#endif
|
2004-01-21 03:05:46 +03:00
|
|
|
|
2012-06-27 01:42:16 +04:00
|
|
|
#include "orca/include/rte_orca.h"
|
2012-04-10 02:14:14 +04:00
|
|
|
|
2006-02-12 04:33:29 +03:00
|
|
|
#include "ompi/mpi/c/bindings.h"
|
2009-04-29 05:32:14 +04:00
|
|
|
#include "ompi/runtime/params.h"
|
|
|
|
#include "ompi/communicator/communicator.h"
|
|
|
|
#include "ompi/errhandler/errhandler.h"
|
2008-02-28 04:57:57 +03:00
|
|
|
#include "ompi/mca/dpm/dpm.h"
|
|
|
|
|
2004-01-21 03:05:46 +03:00
|
|
|
|
2009-05-07 00:11:28 +04:00
|
|
|
#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES
|
2004-01-21 03:05:46 +03:00
|
|
|
#pragma weak MPI_Comm_join = PMPI_Comm_join
|
|
|
|
#endif
|
|
|
|
|
2004-06-07 19:33:53 +04:00
|
|
|
#if OMPI_PROFILING_DEFINES
|
2006-02-12 04:33:29 +03:00
|
|
|
#include "ompi/mpi/c/profile/defines.h"
|
2004-04-20 22:50:43 +04:00
|
|
|
#endif
|
|
|
|
|
2004-07-30 06:58:53 +04:00
|
|
|
static const char FUNC_NAME[] = "MPI_Comm_join";
|
2004-06-23 00:21:35 +04:00
|
|
|
|
2004-08-08 23:20:19 +04:00
|
|
|
static int ompi_socket_send (int fd, char *buf, int len );
|
|
|
|
static int ompi_socket_recv (int fd, char *buf, int len );
|
2004-06-23 00:21:35 +04:00
|
|
|
|
2004-06-16 05:41:01 +04:00
|
|
|
int MPI_Comm_join(int fd, MPI_Comm *intercomm)
|
|
|
|
{
|
2008-02-28 04:57:57 +03:00
|
|
|
int rc;
|
2004-09-17 20:30:40 +04:00
|
|
|
uint32_t len, rlen, llen, lrlen;
|
2012-04-10 02:14:14 +04:00
|
|
|
int send_first=0;
|
2008-04-17 15:56:53 +04:00
|
|
|
char *rport;
|
2012-04-10 02:14:14 +04:00
|
|
|
char *my_name, *remote_name;
|
2012-06-27 01:42:16 +04:00
|
|
|
orca_process_name_t rname;
|
2004-08-08 23:20:19 +04:00
|
|
|
|
2004-08-05 20:32:13 +04:00
|
|
|
ompi_communicator_t *newcomp;
|
2008-04-16 18:27:42 +04:00
|
|
|
char port_name[MPI_MAX_PORT_NAME];
|
2004-06-17 01:35:31 +04:00
|
|
|
|
2004-06-16 05:41:01 +04:00
|
|
|
if ( MPI_PARAM_CHECK ) {
|
2004-06-23 00:21:35 +04:00
|
|
|
OMPI_ERR_INIT_FINALIZE(FUNC_NAME);
|
2004-06-18 23:02:52 +04:00
|
|
|
|
2004-07-30 06:58:53 +04:00
|
|
|
if ( NULL == intercomm ) {
|
2004-06-16 05:41:01 +04:00
|
|
|
return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_ARG,
|
2004-06-23 00:21:35 +04:00
|
|
|
FUNC_NAME);
|
2004-07-30 06:58:53 +04:00
|
|
|
}
|
2004-06-16 05:41:01 +04:00
|
|
|
}
|
2008-02-20 01:15:52 +03:00
|
|
|
|
|
|
|
OPAL_CR_ENTER_LIBRARY();
|
|
|
|
|
2008-04-16 18:27:42 +04:00
|
|
|
/* open a port using the specified tag */
|
|
|
|
if (OMPI_SUCCESS != (rc = ompi_dpm.open_port(port_name, OMPI_COMM_JOIN_TAG))) {
|
2008-02-20 01:15:52 +03:00
|
|
|
OPAL_CR_EXIT_LIBRARY();
|
2005-03-14 23:57:21 +03:00
|
|
|
return rc;
|
|
|
|
}
|
2008-04-16 18:27:42 +04:00
|
|
|
|
2012-04-10 02:14:14 +04:00
|
|
|
/* send my process name */
|
2012-06-27 01:42:16 +04:00
|
|
|
if (OMPI_SUCCESS != (rc = orca_process_info_get_name_as_string(&my_name,
|
|
|
|
ORCA_PROC_MY_NAME))) {
|
2012-04-10 02:14:14 +04:00
|
|
|
*intercomm = MPI_COMM_NULL;
|
|
|
|
OPAL_CR_EXIT_LIBRARY();
|
|
|
|
return MPI_ERR_INTERN;
|
|
|
|
}
|
|
|
|
llen = (uint32_t)(strlen(my_name) + 1);
|
|
|
|
len = htonl(llen);
|
|
|
|
ompi_socket_send( fd, (char *) &len, sizeof(uint32_t));
|
|
|
|
ompi_socket_send( fd, my_name, llen);
|
|
|
|
free(my_name);
|
|
|
|
|
|
|
|
/* recv the remote name */
|
|
|
|
ompi_socket_recv(fd, (char *) &rlen, sizeof(uint32_t));
|
|
|
|
lrlen = ntohl(rlen);
|
|
|
|
remote_name = (char *)malloc(lrlen);
|
|
|
|
if (NULL == remote_name ) {
|
|
|
|
*intercomm = MPI_COMM_NULL;
|
|
|
|
OPAL_CR_EXIT_LIBRARY();
|
|
|
|
return MPI_ERR_INTERN;
|
|
|
|
}
|
|
|
|
ompi_socket_recv(fd, remote_name, lrlen);
|
|
|
|
/* convert the remote name */
|
2012-06-27 01:42:16 +04:00
|
|
|
if (OMPI_SUCCESS != (rc = orca_process_info_convert_string_to_name(&rname, remote_name))) {
|
2012-04-10 02:14:14 +04:00
|
|
|
free(remote_name);
|
|
|
|
*intercomm = MPI_COMM_NULL;
|
|
|
|
OPAL_CR_EXIT_LIBRARY();
|
|
|
|
return MPI_ERR_INTERN;
|
|
|
|
}
|
|
|
|
free(remote_name);
|
|
|
|
|
|
|
|
/* compare the two to get send_first */
|
2012-06-27 01:42:16 +04:00
|
|
|
if (orca_process_info_get_jobid(ORCA_PROC_MY_NAME) == rname.jobid) {
|
|
|
|
if (orca_process_info_get_vpid(ORCA_PROC_MY_NAME) < rname.vpid) {
|
2012-04-10 02:14:14 +04:00
|
|
|
send_first = true;
|
2012-06-27 01:42:16 +04:00
|
|
|
} else if (orca_process_info_get_vpid(ORCA_PROC_MY_NAME) == rname.vpid) {
|
2012-04-10 02:14:14 +04:00
|
|
|
/* joining to myself is not allowed */
|
|
|
|
*intercomm = MPI_COMM_NULL;
|
|
|
|
OPAL_CR_EXIT_LIBRARY();
|
|
|
|
return MPI_ERR_INTERN;
|
|
|
|
} else {
|
|
|
|
send_first = false;
|
|
|
|
}
|
2012-06-27 01:42:16 +04:00
|
|
|
} else if (orca_process_info_get_jobid(ORCA_PROC_MY_NAME) < rname.jobid) {
|
2012-04-10 02:14:14 +04:00
|
|
|
send_first = true;
|
|
|
|
}
|
|
|
|
|
2008-04-16 18:27:42 +04:00
|
|
|
/* sendrecv port-name through the socket connection.
|
|
|
|
Need to determine somehow how to avoid a potential deadlock
|
|
|
|
here. */
|
|
|
|
llen = (uint32_t)(strlen(port_name)+1);
|
2004-09-17 20:30:40 +04:00
|
|
|
len = htonl(llen);
|
2004-08-08 23:20:19 +04:00
|
|
|
|
|
|
|
ompi_socket_send( fd, (char *) &len, sizeof(uint32_t));
|
|
|
|
ompi_socket_recv (fd, (char *) &rlen, sizeof(uint32_t));
|
|
|
|
|
2004-09-17 20:30:40 +04:00
|
|
|
lrlen = ntohl(rlen);
|
2008-04-17 15:56:53 +04:00
|
|
|
rport = (char *) malloc (lrlen);
|
|
|
|
if ( NULL == rport ) {
|
2004-08-08 23:20:19 +04:00
|
|
|
*intercomm = MPI_COMM_NULL;
|
2008-02-20 01:15:52 +03:00
|
|
|
OPAL_CR_EXIT_LIBRARY();
|
2004-08-08 23:20:19 +04:00
|
|
|
return MPI_ERR_INTERN;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Assumption: socket_send should not block, even if the socket
|
|
|
|
is not configured to be non-blocking, because the message length are
|
|
|
|
so short. */
|
2008-04-16 18:27:42 +04:00
|
|
|
ompi_socket_send (fd, port_name, llen);
|
|
|
|
ompi_socket_recv (fd, rport, lrlen);
|
2004-08-08 23:20:19 +04:00
|
|
|
|
2008-04-16 18:27:42 +04:00
|
|
|
/* use the port we received to connect/accept */
|
|
|
|
rc = ompi_dpm.connect_accept (MPI_COMM_SELF, 0, rport, send_first, &newcomp);
|
2004-06-16 05:41:01 +04:00
|
|
|
|
2004-08-08 23:20:19 +04:00
|
|
|
|
2008-04-16 18:27:42 +04:00
|
|
|
free ( rport );
|
2004-08-08 23:20:19 +04:00
|
|
|
|
2004-06-17 01:35:31 +04:00
|
|
|
*intercomm = newcomp;
|
2004-08-05 20:32:13 +04:00
|
|
|
OMPI_ERRHANDLER_RETURN (rc, MPI_COMM_SELF, rc, FUNC_NAME);
|
2004-01-21 03:05:46 +03:00
|
|
|
}
|
2004-08-08 23:20:19 +04:00
|
|
|
|
|
|
|
|
|
|
|
static int ompi_socket_send (int fd, char *buf, int len )
|
|
|
|
{
|
2005-02-09 19:25:38 +03:00
|
|
|
int num;
|
2004-08-08 23:20:19 +04:00
|
|
|
size_t s_num;
|
|
|
|
ssize_t a;
|
|
|
|
char *c_ptr;
|
|
|
|
int ret = OMPI_SUCCESS;
|
|
|
|
|
2006-10-20 07:57:44 +04:00
|
|
|
num = len;
|
|
|
|
c_ptr = buf;
|
2004-08-08 23:20:19 +04:00
|
|
|
|
|
|
|
do {
|
2007-09-13 18:00:59 +04:00
|
|
|
s_num = (size_t) num;
|
2004-08-08 23:20:19 +04:00
|
|
|
a = write ( fd, c_ptr, s_num );
|
|
|
|
if ( a == -1 ) {
|
|
|
|
if ( errno == EINTR ) {
|
|
|
|
/* Catch EINTR on, mainly on IBM RS6000 */
|
|
|
|
continue;
|
2007-09-13 18:00:59 +04:00
|
|
|
}
|
2004-08-08 23:20:19 +04:00
|
|
|
#ifdef __SR8000
|
|
|
|
else if ( errno == EWOULDBLOCK ) {
|
|
|
|
/*Catch EWOULDBLOCK on Hitachi SR8000 */
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
else if ( errno == EAGAIN ) {
|
|
|
|
/* Catch EAGAIN on Hitachi SR8000 */
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
else {
|
|
|
|
/* Another error occured */
|
2006-03-16 12:42:46 +03:00
|
|
|
fprintf (stderr,"ompi_socket_send: error while writing to socket"
|
2004-08-08 23:20:19 +04:00
|
|
|
" error:%s", strerror (errno) );
|
2006-03-16 12:42:46 +03:00
|
|
|
return MPI_ERR_OTHER;
|
2007-09-13 18:00:59 +04:00
|
|
|
}
|
|
|
|
}
|
2005-02-09 19:25:38 +03:00
|
|
|
num -= a;
|
|
|
|
c_ptr += a;
|
|
|
|
} while ( num > 0 );
|
2004-08-08 23:20:19 +04:00
|
|
|
|
|
|
|
|
2007-09-13 18:00:59 +04:00
|
|
|
if ( num < 0 ) {
|
|
|
|
fprintf (stderr, "ompi_socket_send: more data written then available");
|
|
|
|
ret = MPI_ERR_INTERN;
|
|
|
|
}
|
|
|
|
|
|
|
|
return ret;
|
2004-08-08 23:20:19 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
static int ompi_socket_recv (int fd, char *buf, int len )
|
|
|
|
{
|
2005-02-09 19:25:38 +03:00
|
|
|
int num;
|
2004-08-08 23:20:19 +04:00
|
|
|
size_t s_num;
|
|
|
|
ssize_t a;
|
|
|
|
char *c_ptr;
|
2006-03-16 12:42:46 +03:00
|
|
|
int ret = MPI_SUCCESS;
|
2004-08-08 23:20:19 +04:00
|
|
|
|
2005-02-09 19:25:38 +03:00
|
|
|
num = len;
|
2004-08-08 23:20:19 +04:00
|
|
|
c_ptr = buf;
|
|
|
|
|
|
|
|
do {
|
2007-09-13 18:00:59 +04:00
|
|
|
s_num = (size_t ) num;
|
2004-08-08 23:20:19 +04:00
|
|
|
a = read ( fd, c_ptr, s_num );
|
|
|
|
if ( a == -1 ) {
|
|
|
|
if ( errno == EINTR ) {
|
|
|
|
/* Catch EINTR on, mainly on IBM RS6000 */
|
|
|
|
continue;
|
2007-09-13 18:00:59 +04:00
|
|
|
}
|
2004-08-08 23:20:19 +04:00
|
|
|
#ifdef __SR8000
|
|
|
|
else if ( errno == EWOULDBLOCK ) {
|
|
|
|
/*Catch EWOULDBLOCK on Hitachi SR8000 */
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
else if ( errno == EAGAIN ) {
|
|
|
|
/* Catch EAGAIN on Hitachi SR8000 */
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
else {
|
|
|
|
/* Another error occured */
|
2006-03-16 12:42:46 +03:00
|
|
|
fprintf (stderr,"ompi_socket_recv: error while reading from socket"
|
2004-08-08 23:20:19 +04:00
|
|
|
" error:%s", strerror (errno) );
|
2006-03-16 12:42:46 +03:00
|
|
|
return MPI_ERR_OTHER;
|
2007-09-13 18:00:59 +04:00
|
|
|
}
|
|
|
|
}
|
2005-02-09 19:27:27 +03:00
|
|
|
num -= a;
|
|
|
|
c_ptr += a;
|
2005-02-09 19:25:38 +03:00
|
|
|
} while ( num > 0 );
|
2004-08-08 23:20:19 +04:00
|
|
|
|
2007-09-13 18:00:59 +04:00
|
|
|
if ( num < 0 ) {
|
|
|
|
fprintf (stderr, "ompi_socket_recv: more data read then available");
|
|
|
|
ret = MPI_ERR_INTERN;
|
|
|
|
}
|
2004-08-08 23:20:19 +04:00
|
|
|
|
2007-09-13 18:00:59 +04:00
|
|
|
return ret;
|
2004-08-08 23:20:19 +04:00
|
|
|
}
|