1
1
openmpi/orte/test/mpi/add_host.c
Ralph Castain 75eb56522c Continue resolving add_host behavior
Fix a problem in packing/unpacking job updates. There remains a race condition that causes messages to attempt to be sent to the second new daemon before it is completely ready. Not entirely sure where it is coming from.

Refs #4665

Rebase to master. Reset orte_nidmap_communicated if hosts are added. Check for duplicate hostnames in an add_host command. Turn off tree_spawn for dynamic launch of additional daemons.

Signed-off-by: Ralph Castain <rhc@open-mpi.org>
2018-01-15 08:21:01 -08:00

84 строки
2.8 KiB
C

#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>
#include <unistd.h>
#include <sys/param.h>
#include <mpi.h>
int main(int argc, char* argv[])
{
int msg, rc;
MPI_Comm parent, child;
int rank, size;
char hostname[MAXHOSTNAMELEN];
pid_t pid;
char *env_rank,*env_nspace;
MPI_Info info;
env_rank = getenv("PMIX_RANK");
env_nspace = getenv("PMIX_NAMESPACE");
pid = getpid();
gethostname(hostname, sizeof(hostname));
printf("[%s:%s pid %ld] starting up on node %s!\n", env_nspace, env_rank, (long)pid, hostname);
MPI_Init(NULL, NULL);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
printf("%d completed MPI_Init\n", rank);
MPI_Comm_size(MPI_COMM_WORLD, &size);
MPI_Comm_get_parent(&parent);
/* If we get COMM_NULL back, then we're the parent */
if (MPI_COMM_NULL == parent) {
pid = getpid();
printf("Parent [pid %ld] about to spawn!\n", (long)pid);
MPI_Info_create(&info);
MPI_Info_set(info, "add-host", "rhc002:24");
if (MPI_SUCCESS != (rc = MPI_Comm_spawn(argv[0], MPI_ARGV_NULL, 3, info,
0, MPI_COMM_WORLD, &child, MPI_ERRCODES_IGNORE))) {
printf("Child failed to spawn\n");
return rc;
}
printf("Parent done with spawn\n");
if (0 == rank) {
msg = 38;
printf("Parent sending message to child\n");
MPI_Send(&msg, 1, MPI_INT, 0, 1, child);
}
MPI_Comm_disconnect(&child);
printf("Parent disconnected\n");
/* do it again */
MPI_Info_set(info, "add-host", "rhc003:24");
if (MPI_SUCCESS != (rc = MPI_Comm_spawn(argv[0], MPI_ARGV_NULL, 3, info,
0, MPI_COMM_WORLD, &child, MPI_ERRCODES_IGNORE))) {
printf("Child failed to spawn\n");
return rc;
}
printf("Parent done with second spawn\n");
if (0 == rank) {
msg = 38;
printf("Parent sending message to second children\n");
MPI_Send(&msg, 1, MPI_INT, 0, 1, child);
}
MPI_Comm_disconnect(&child);
printf("Parent disconnected again\n");
}
/* Otherwise, we're the child */
else {
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &size);
pid = getpid();
printf("Hello from the child %d of %d on host %s pid %ld\n", rank, 3, hostname, (long)pid);
if (0 == rank) {
MPI_Recv(&msg, 1, MPI_INT, 0, 1, parent, MPI_STATUS_IGNORE);
printf("Child %d received msg: %d\n", rank, msg);
}
MPI_Comm_disconnect(&parent);
printf("Child %d disconnected\n", rank);
}
MPI_Finalize();
fprintf(stderr, "%d: exiting\n", pid);
return 0;
}