1
1

Fix add-host support by including the location for procs of prior jobs when spawning new daemons.

Thanks to CalugaruVaxile for the report

Signed-off-by: Ralph Castain <rhc@open-mpi.org>
Этот коммит содержится в:
Ralph Castain 2017-12-07 14:47:58 -08:00
родитель 0d1c58853b
Коммит 4316213805
5 изменённых файлов: 113 добавлений и 3 удалений

1
.gitignore поставляемый
Просмотреть файл

@ -435,6 +435,7 @@ orte/test/mpi/badcoll
orte/test/mpi/iof
orte/test/mpi/no-disconnect
orte/test/mpi/nonzero
orte/test/mpi/add_host
orte/test/system/radix
orte/test/system/sigusr_trap

Просмотреть файл

@ -117,9 +117,10 @@ int orte_odls_base_default_get_add_procs_data(opal_buffer_t *buffer,
void *nptr;
uint32_t key;
char *nidmap;
orte_proc_t *dmn;
orte_proc_t *dmn, *proc;
opal_value_t *val = NULL, *kv;
opal_list_t *modex;
int n;
/* get the job data pointer */
if (NULL == (jdata = orte_get_job_data_object(job))) {
@ -282,6 +283,17 @@ int orte_odls_base_default_get_add_procs_data(opal_buffer_t *buffer,
OBJ_DESTRUCT(&jobdata);
return rc;
}
/* pack the location of each proc */
for (n=0; n < jptr->procs->size; n++) {
if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(jptr->procs, n))) {
continue;
}
if (ORTE_SUCCESS != (rc = opal_dss.pack(&jobdata, &proc->parent, 1, ORTE_VPID))) {
ORTE_ERROR_LOG(rc);
OBJ_DESTRUCT(&jobdata);
return rc;
}
}
++numjobs;
}
rc = opal_hash_table_get_next_key_uint32(orte_job_data, &key, (void **)&jptr, nptr, &nptr);
@ -355,6 +367,7 @@ int orte_odls_base_default_construct_child_list(opal_buffer_t *buffer,
orte_std_cntr_t cnt;
orte_job_t *jdata=NULL, *daemons;
orte_node_t *node;
orte_vpid_t dmnvpid, v;
int32_t n, k;
opal_buffer_t *bptr;
orte_proc_t *pptr, *dmn;
@ -411,6 +424,31 @@ int orte_odls_base_default_construct_child_list(opal_buffer_t *buffer,
/* yep - so we can drop this copy */
jdata->jobid = ORTE_JOBID_INVALID;
OBJ_RELEASE(jdata);
continue;
}
/* unpack the location of each proc in this job */
for (v=0; v < jdata->num_procs; v++) {
if (NULL == (pptr = (orte_proc_t*)opal_pointer_array_get_item(jdata->procs, v))) {
pptr = OBJ_NEW(orte_proc_t);
pptr->name.jobid = jdata->jobid;
pptr->name.vpid = v;
opal_pointer_array_set_item(jdata->procs, v, pptr);
}
cnt=1;
if (ORTE_SUCCESS != (rc = opal_dss.unpack(bptr, &dmnvpid, &cnt, ORTE_VPID))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(jdata);
goto REPORT_ERROR;
}
/* lookup the daemon */
if (NULL == (dmn = (orte_proc_t*)opal_pointer_array_get_item(daemons->procs, dmnvpid))) {
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
rc = ORTE_ERR_NOT_FOUND;
goto REPORT_ERROR;
}
/* connect the two */
OBJ_RETAIN(dmn->node);
pptr->node = dmn->node;
}
}
/* release the buffer */

Просмотреть файл

@ -173,6 +173,8 @@ void orte_plm_base_daemons_reported(int fd, short args, void *cbdata)
if (orte_display_allocation) {
orte_ras_base_display_alloc();
}
/* ensure we update the routing plan */
orte_routed.update_routing_plan(NULL);
/* progress the job */
caddy->jdata->state = ORTE_JOB_STATE_DAEMONS_REPORTED;
@ -1346,8 +1348,9 @@ void orte_plm_base_daemon_callback(int status, orte_process_name_t* sender,
} else {
jdatorted->num_reported++;
OPAL_OUTPUT_VERBOSE((5, orte_plm_base_framework.framework_output,
"%s plm:base:orted_report_launch recvd %d of %d reported daemons",
"%s plm:base:orted_report_launch job %s recvd %d of %d reported daemons",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_JOBID_PRINT(jdatorted->jobid),
jdatorted->num_reported, jdatorted->num_procs));
if (jdatorted->num_procs == jdatorted->num_reported) {
bool dvm = true;

Просмотреть файл

@ -5,7 +5,7 @@ PROGS = mpi_no_op mpi_barrier hello hello_nodename abort multi_abort simple_spaw
parallel_w8 parallel_w64 parallel_r8 parallel_r64 sio sendrecv_blaster early_abort \
debugger singleton_client_server intercomm_create spawn_tree init-exit77 mpi_info \
info_spawn server client paccept pconnect ring hello.sapp binding badcoll attach xlib \
no-disconnect nonzero interlib pinterlib
no-disconnect nonzero interlib pinterlib add_host
all: $(PROGS)

68
orte/test/mpi/add_host.c Обычный файл
Просмотреть файл

@ -0,0 +1,68 @@
#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>
#include <unistd.h>
#include <sys/param.h>
#include <mpi.h>
int main(int argc, char* argv[])
{
int msg, rc;
MPI_Comm parent, child;
int rank, size;
char hostname[MAXHOSTNAMELEN];
pid_t pid;
char *env_rank,*env_nspace;
MPI_Info info;
env_rank = getenv("PMIX_RANK");
env_nspace = getenv("PMIX_NAMESPACE");
pid = getpid();
gethostname(hostname, sizeof(hostname));
printf("[%s:%s pid %ld] starting up on node %s!\n", env_nspace, env_rank, (long)pid, hostname);
MPI_Init(NULL, NULL);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
printf("%d completed MPI_Init\n", rank);
MPI_Comm_size(MPI_COMM_WORLD, &size);
MPI_Comm_get_parent(&parent);
/* If we get COMM_NULL back, then we're the parent */
if (MPI_COMM_NULL == parent) {
pid = getpid();
printf("Parent [pid %ld] about to spawn!\n", (long)pid);
MPI_Info_create(&info);
MPI_Info_set(info, "add-host", "rhc002:24");
if (MPI_SUCCESS != (rc = MPI_Comm_spawn(argv[0], MPI_ARGV_NULL, 3, info,
0, MPI_COMM_WORLD, &child, MPI_ERRCODES_IGNORE))) {
printf("Child failed to spawn\n");
return rc;
}
printf("Parent done with spawn\n");
if (0 == rank) {
msg = 38;
printf("Parent sending message to child\n");
MPI_Send(&msg, 1, MPI_INT, 0, 1, child);
}
MPI_Comm_disconnect(&child);
printf("Parent disconnected\n");
}
/* Otherwise, we're the child */
else {
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &size);
pid = getpid();
printf("Hello from the child %d of %d on host %s pid %ld\n", rank, 3, hostname, (long)pid);
if (0 == rank) {
MPI_Recv(&msg, 1, MPI_INT, 0, 1, parent, MPI_STATUS_IGNORE);
printf("Child %d received msg: %d\n", rank, msg);
}
MPI_Comm_disconnect(&parent);
printf("Child %d disconnected\n", rank);
}
MPI_Finalize();
fprintf(stderr, "%d: exiting\n", pid);
return 0;
}