1
1

* Fix a number of environment variable naming issues in the cofs code. This

really needs to use the param registry code
* make mpirun use -f -n to start processes - seems to make ssh happier
* update oob test to go through the mca layer

This commit was SVN r663.
Этот коммит содержится в:
Brian Barrett 2004-02-08 02:09:17 +00:00
родитель 4d601421b2
Коммит 4c9d393a0e
9 изменённых файлов: 81 добавлений и 131 удалений

Просмотреть файл

@ -110,7 +110,7 @@ mca_oob_cofs_init(int *priority, bool *allow_multi_user_threads,
* BWB - fix me, make register the "right" way... * BWB - fix me, make register the "right" way...
*/ */
/* find our vpid */ /* find our vpid */
tmp = getenv("MCA_OOB_BASE_VPID"); tmp = getenv("MCA_common_lam_cofs_my_vpid");
if (tmp == NULL) { if (tmp == NULL) {
printf("oob_cofs can not find vpid\n"); printf("oob_cofs can not find vpid\n");
return NULL; return NULL;

Просмотреть файл

@ -101,5 +101,7 @@ typedef mca_oob_1_0_0_t mca_oob_t;
/* oob v1.0 */ \ /* oob v1.0 */ \
"oob", 1, 0, 0 "oob", 1, 0, 0
extern mca_oob_t mca_oob;
#endif #endif

Просмотреть файл

@ -58,6 +58,8 @@
#include "mca/mca.h" #include "mca/mca.h"
#include "lam/types.h" #include "lam/types.h"
#include <sys/param.h>
/* /*
* "PCM" global types * "PCM" global types
*/ */
@ -66,6 +68,7 @@
#define LAM_PCM_PROC_OTHER 2 #define LAM_PCM_PROC_OTHER 2
struct mca_pcm_rte_node_t { struct mca_pcm_rte_node_t {
char name[MAXHOSTNAMELEN];
int32_t node_num; int32_t node_num;
int32_t num_procs; int32_t num_procs;
}; };

Просмотреть файл

@ -108,7 +108,7 @@ mca_registry_cofs_init(int *priority, bool *allow_multi_user_threads,
* BWB - fix me, make register the "right" way... * BWB - fix me, make register the "right" way...
*/ */
/* find our vpid */ /* find our vpid */
tmp = getenv("MCA_REGISTRY_BASE_VPID"); tmp = getenv("MCA_common_lam_cofs_my_vpid");
if (tmp == NULL) { if (tmp == NULL) {
printf("registry_cofs can not find vpid\n"); printf("registry_cofs can not find vpid\n");
return NULL; return NULL;

Просмотреть файл

@ -91,5 +91,4 @@ fi
# Spawn and run # Spawn and run
# #
###################################################################### ######################################################################
exec $@ & exec $@
exit 0

Просмотреть файл

@ -116,7 +116,7 @@ start_proc() {
local argv="$*" local argv="$*"
local rmt_boot="${lam_mydir}/mpiboot" local rmt_boot="${lam_mydir}/mpiboot"
local cmd="ssh ${rmt_hostname} ${rmt_boot} -myvpid ${rmt_vpid}" local cmd="ssh ${rmt_hostname} -f -n ${rmt_boot} -myvpid ${rmt_vpid}"
cmd="${cmd} -numprocs ${lam_numprocs} -jobhandle ${lam_jobhandle}" cmd="${cmd} -numprocs ${lam_numprocs} -jobhandle ${lam_jobhandle}"
cmd="${cmd} -pwd ${lam_cwd}" cmd="${cmd} -pwd ${lam_cwd}"
if test ! -z "${MCA_common_lam_cofs_comm_dir}" ; then if test ! -z "${MCA_common_lam_cofs_comm_dir}" ; then
@ -193,7 +193,7 @@ launch_procs() {
# Do the prep work # Do the prep work
# #
###################################################################### ######################################################################
lam_jobhandle="pcm_cofs_job_handle_${$}_0" lam_jobhandle="pcm-cofs-job-handle-${$}-0"
###################################################################### ######################################################################

Просмотреть файл

@ -5,15 +5,9 @@
include $(top_srcdir)/config/Makefile.options include $(top_srcdir)/config/Makefile.options
noinst_PROGRAMS = oob_cofs_test pcm_cofs_test noinst_PROGRAMS = oob_cofs_test
oob_cofs_test_SOURCES = oob_cofs_test.c oob_cofs_test_SOURCES = oob_cofs_test.c
oob_cofs_test_LDADD = \ oob_cofs_test_LDADD = \
../../../../src/mca/lam/oob/cofs/libmca_lam_oob_cofs.la \ ../../../../src/mca/lam/oob/cofs/libmca_lam_oob_cofs.la \
../../../../src/liblam.la ../../../../src/liblam.la
pcm_cofs_test_SOURCES = pcm_cofs_test.c
pcm_cofs_test_LDADD = \
../../../../src/mca/lam/pcm/cofs/libmca_lam_pcm_cofs.la \
../../../../src/mca/lam/oob/cofs/libmca_lam_oob_cofs.la \
../../../../src/liblam.la

Просмотреть файл

@ -1,74 +1,92 @@
#include "lam/runtime/runtime.h"
#include "mca/lam/oob/oob.h" #include "mca/lam/oob/oob.h"
#include "mca/lam/oob/cofs/src/oob_cofs.h" #include "mca/lam/pcm/pcm.h"
#include "mca/lam/base/base.h"
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
#include <string.h> #include <string.h>
#include <assert.h>
int int
main(int argc, char* argv[]) main(int argc, char* argv[])
{ {
int ret; int ret;
char *tmp; mca_pcm_proc_t *procs;
struct mca_oob_1_0_0_t *init_ret; size_t nprocs;
int target_vpid, source_vpid, source_tag; mca_pcm_proc_t *me;
size_t source_len; int left_vpid, right_vpid, me_vpid;
char *source_data; int count = 2;
char buffer[2048]; lam_job_handle_t job;
int msg_count = 0; int data = 0xDEADBEEF;
int priority = 0; int tag = MCA_OOB_ANY_TAG;
bool allow_threads = false; bool threads, hidden;
bool have_hidden_threads = false;
printf("hello, world!\n");
ret = lam_init(argc, argv);
assert(ret == LAM_SUCCESS);
if (argc != 3) { ret = mca_base_open();
printf("usage: %s my_vpid target_vpid\n", argv[0]); assert(ret == LAM_SUCCESS);
exit(1);
ret = lam_rte_init(&threads, &hidden);
assert(ret == LAM_SUCCESS);
ret = mca_pcm.pcm_proc_startup();
assert(ret == LAM_SUCCESS);
ret = mca_pcm.pcm_proc_get_peers(&procs, &nprocs);
assert(ret == LAM_SUCCESS);
job = mca_pcm.pcm_handle_get();
assert(job != NULL);
me = mca_pcm.pcm_proc_get_me();
assert(me != NULL);
/* time to play the ring game! */
me_vpid = me->vpid;
printf("Hello, World. I am vpid %d\n", me_vpid);
left_vpid = me_vpid == 0 ? nprocs - 1 : me_vpid - 1;
right_vpid = (me_vpid + 1) % nprocs;
if (me_vpid == 0) {
printf("vpid %d sending to vpid %d\n", me_vpid, right_vpid);
ret = mca_oob.oob_send(job, right_vpid, 0, &data, sizeof(int));
assert(ret == LAM_SUCCESS);
count--;
} }
tmp = malloc(strlen("MCA_OOB_BASE_VPID") + strlen(argv[1]) + 2); while (count > 0) {
sprintf(tmp, "MCA_OOB_BASE_VPID=%s", argv[1]); int *data_ptr;
putenv(tmp); size_t data_ptr_len;
printf("vpid %d recving from vpid %d\n", me_vpid, left_vpid);
ret = mca_oob.oob_recv(job, left_vpid, &tag, &data_ptr, &data_ptr_len);
assert(ret == LAM_SUCCESS);
assert(data_ptr_len == sizeof(int));
assert(*data_ptr == data);
target_vpid = atoi(argv[2]); printf("vpid %d sending to vpid %d\n", me_vpid, right_vpid);
ret = mca_oob.oob_send(job, right_vpid, 0, &data, sizeof(int));
assert(ret == LAM_SUCCESS);
ret = mca_oob_cofs_open(); count--;
if (ret != LAM_SUCCESS) {
printf("mca_oob_cofs_open returned %d\n", ret);
exit(1);
} }
init_ret = mca_oob_cofs_init(&priority, &allow_threads, &have_hidden_threads);
if (init_ret == NULL) { if (me_vpid == 0) {
printf("mca_oob_cofs_init returned NULL\n"); int *data_ptr;
exit(1); size_t data_ptr_len;
} else { printf("vpid %d recving from vpid %d\n", me_vpid, left_vpid);
printf("mca_oob_cofs_query said \"go\" with priority %d\n", priority); ret = mca_oob.oob_recv(job, left_vpid, &tag, &data_ptr, &data_ptr_len);
assert(ret == LAM_SUCCESS);
assert(data_ptr_len == sizeof(int));
assert(*data_ptr == data);
} }
printf("#\n# Sending Messages\n#\n\n"); ret = lam_rte_finalize();
for (msg_count = 0 ; msg_count < 20 ; ++msg_count) { assert(ret == LAM_SUCCESS);
sprintf(buffer, "%s's message number %d\n", argv[1], msg_count);
printf("%d %d: %s\n", target_vpid, 1, buffer);
ret = mca_oob_cofs_send("foobar", target_vpid, 1, buffer, strlen(buffer) + 1);
if (ret != LAM_SUCCESS) {
printf("mca_oob_cofs_send failed on msg_count %d\n", msg_count);
exit(1);
}
}
printf("#\n# Receiving Messages\n#\n\n");
for (msg_count = 0 ; msg_count < 20 ; ++msg_count) {
source_tag = 1;
ret = mca_oob_cofs_recv("foobar", target_vpid, &source_tag, (void**) &source_data, &source_len);
if (ret != LAM_SUCCESS) {
printf("mca_oob_cofs_recv failed on msg_count %d, %d\n", msg_count, ret);
exit(1);
}
printf("%d %d: %s\n", source_vpid, source_tag, source_data);
free(source_data);
}
printf("#\n# Finished\n#\n\n");
return 0; return 0;
} }

Просмотреть файл

@ -1,66 +0,0 @@
#include "lam_config.h"
#include "lam/constants.h"
#include "mca/lam/oob/oob.h"
#include "mca/lam/oob/cofs/src/oob_cofs.h"
#include "mca/lam/pcm/pcm.h"
#include "mca/lam/pcm/cofs/src/pcm_cofs.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int
main(int argc, char *argv[])
{
int ret;
struct mca_pcm_1_0_0_t* modret;
mca_pcm_proc_t *procs;
size_t nprocs;
mca_pcm_proc_t *my_proc;
int priority = 0;
bool allow_threads = false;
bool have_hidden_threads = false;
modret = mca_pcm_cofs_init(&priority, &allow_threads, &have_hidden_threads);
if (modret == NULL) {
printf("failed to init PCM module. Aborting.\n");
exit(1);
}
/*
* Do all the stuff that a pcm module user would do
*/
ret = mca_pcm_cofs_proc_startup();
if (ret != MPI_SUCCESS) {
printf("Failed in cofs_startup() with retcode: %d\n", ret);
exit(1);
}
ret = mca_pcm_cofs_proc_get_peers(&procs, &nprocs);
if (ret != MPI_SUCCESS) {
printf("Failed in cofs_proc_get_peers() with retcode %d\n", ret);
exit(1);
}
my_proc = mca_pcm_cofs_proc_get_me();
if (my_proc == NULL) {
printf("Failed to get my proc entry\n");
exit(1);
}
printf("Hello, World, I am vpid %d of job %s\n", my_proc->vpid, my_proc->job_handle);
/*
* from here, we can bring up the OOB interface (if it isn't already) and be
* ready to run...
*/
/*
* Clean up after ourselves - normall that mca interface would do that for us...
*/
mca_pcm_cofs_close();
return 0;
}