* Fix a number of environment variable naming issues in the cofs code. This
really needs to use the param registry code * make mpirun use -f -n to start processes - seems to make ssh happier * update oob test to go through the mca layer This commit was SVN r663.
Этот коммит содержится в:
родитель
4d601421b2
Коммит
4c9d393a0e
@ -110,7 +110,7 @@ mca_oob_cofs_init(int *priority, bool *allow_multi_user_threads,
|
||||
* BWB - fix me, make register the "right" way...
|
||||
*/
|
||||
/* find our vpid */
|
||||
tmp = getenv("MCA_OOB_BASE_VPID");
|
||||
tmp = getenv("MCA_common_lam_cofs_my_vpid");
|
||||
if (tmp == NULL) {
|
||||
printf("oob_cofs can not find vpid\n");
|
||||
return NULL;
|
||||
|
@ -101,5 +101,7 @@ typedef mca_oob_1_0_0_t mca_oob_t;
|
||||
/* oob v1.0 */ \
|
||||
"oob", 1, 0, 0
|
||||
|
||||
extern mca_oob_t mca_oob;
|
||||
|
||||
|
||||
#endif
|
||||
|
@ -58,6 +58,8 @@
|
||||
#include "mca/mca.h"
|
||||
#include "lam/types.h"
|
||||
|
||||
#include <sys/param.h>
|
||||
|
||||
/*
|
||||
* "PCM" global types
|
||||
*/
|
||||
@ -66,6 +68,7 @@
|
||||
#define LAM_PCM_PROC_OTHER 2
|
||||
|
||||
struct mca_pcm_rte_node_t {
|
||||
char name[MAXHOSTNAMELEN];
|
||||
int32_t node_num;
|
||||
int32_t num_procs;
|
||||
};
|
||||
|
@ -108,7 +108,7 @@ mca_registry_cofs_init(int *priority, bool *allow_multi_user_threads,
|
||||
* BWB - fix me, make register the "right" way...
|
||||
*/
|
||||
/* find our vpid */
|
||||
tmp = getenv("MCA_REGISTRY_BASE_VPID");
|
||||
tmp = getenv("MCA_common_lam_cofs_my_vpid");
|
||||
if (tmp == NULL) {
|
||||
printf("registry_cofs can not find vpid\n");
|
||||
return NULL;
|
||||
|
@ -91,5 +91,4 @@ fi
|
||||
# Spawn and run
|
||||
#
|
||||
######################################################################
|
||||
exec $@ &
|
||||
exit 0
|
||||
exec $@
|
||||
|
@ -116,7 +116,7 @@ start_proc() {
|
||||
local argv="$*"
|
||||
local rmt_boot="${lam_mydir}/mpiboot"
|
||||
|
||||
local cmd="ssh ${rmt_hostname} ${rmt_boot} -myvpid ${rmt_vpid}"
|
||||
local cmd="ssh ${rmt_hostname} -f -n ${rmt_boot} -myvpid ${rmt_vpid}"
|
||||
cmd="${cmd} -numprocs ${lam_numprocs} -jobhandle ${lam_jobhandle}"
|
||||
cmd="${cmd} -pwd ${lam_cwd}"
|
||||
if test ! -z "${MCA_common_lam_cofs_comm_dir}" ; then
|
||||
@ -193,7 +193,7 @@ launch_procs() {
|
||||
# Do the prep work
|
||||
#
|
||||
######################################################################
|
||||
lam_jobhandle="pcm_cofs_job_handle_${$}_0"
|
||||
lam_jobhandle="pcm-cofs-job-handle-${$}-0"
|
||||
|
||||
|
||||
######################################################################
|
||||
|
@ -5,15 +5,9 @@
|
||||
|
||||
include $(top_srcdir)/config/Makefile.options
|
||||
|
||||
noinst_PROGRAMS = oob_cofs_test pcm_cofs_test
|
||||
noinst_PROGRAMS = oob_cofs_test
|
||||
|
||||
oob_cofs_test_SOURCES = oob_cofs_test.c
|
||||
oob_cofs_test_LDADD = \
|
||||
../../../../src/mca/lam/oob/cofs/libmca_lam_oob_cofs.la \
|
||||
../../../../src/liblam.la
|
||||
|
||||
pcm_cofs_test_SOURCES = pcm_cofs_test.c
|
||||
pcm_cofs_test_LDADD = \
|
||||
../../../../src/mca/lam/pcm/cofs/libmca_lam_pcm_cofs.la \
|
||||
../../../../src/mca/lam/oob/cofs/libmca_lam_oob_cofs.la \
|
||||
../../../../src/liblam.la
|
||||
|
@ -1,74 +1,92 @@
|
||||
#include "lam/runtime/runtime.h"
|
||||
#include "mca/lam/oob/oob.h"
|
||||
#include "mca/lam/oob/cofs/src/oob_cofs.h"
|
||||
#include "mca/lam/pcm/pcm.h"
|
||||
#include "mca/lam/base/base.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <assert.h>
|
||||
|
||||
int
|
||||
main(int argc, char* argv[])
|
||||
{
|
||||
int ret;
|
||||
char *tmp;
|
||||
struct mca_oob_1_0_0_t *init_ret;
|
||||
int target_vpid, source_vpid, source_tag;
|
||||
size_t source_len;
|
||||
char *source_data;
|
||||
char buffer[2048];
|
||||
int msg_count = 0;
|
||||
int priority = 0;
|
||||
bool allow_threads = false;
|
||||
bool have_hidden_threads = false;
|
||||
mca_pcm_proc_t *procs;
|
||||
size_t nprocs;
|
||||
mca_pcm_proc_t *me;
|
||||
int left_vpid, right_vpid, me_vpid;
|
||||
int count = 2;
|
||||
lam_job_handle_t job;
|
||||
int data = 0xDEADBEEF;
|
||||
int tag = MCA_OOB_ANY_TAG;
|
||||
bool threads, hidden;
|
||||
|
||||
printf("hello, world!\n");
|
||||
ret = lam_init(argc, argv);
|
||||
assert(ret == LAM_SUCCESS);
|
||||
|
||||
if (argc != 3) {
|
||||
printf("usage: %s my_vpid target_vpid\n", argv[0]);
|
||||
exit(1);
|
||||
ret = mca_base_open();
|
||||
assert(ret == LAM_SUCCESS);
|
||||
|
||||
ret = lam_rte_init(&threads, &hidden);
|
||||
assert(ret == LAM_SUCCESS);
|
||||
|
||||
ret = mca_pcm.pcm_proc_startup();
|
||||
assert(ret == LAM_SUCCESS);
|
||||
|
||||
ret = mca_pcm.pcm_proc_get_peers(&procs, &nprocs);
|
||||
assert(ret == LAM_SUCCESS);
|
||||
|
||||
job = mca_pcm.pcm_handle_get();
|
||||
assert(job != NULL);
|
||||
|
||||
me = mca_pcm.pcm_proc_get_me();
|
||||
assert(me != NULL);
|
||||
|
||||
/* time to play the ring game! */
|
||||
me_vpid = me->vpid;
|
||||
printf("Hello, World. I am vpid %d\n", me_vpid);
|
||||
|
||||
left_vpid = me_vpid == 0 ? nprocs - 1 : me_vpid - 1;
|
||||
right_vpid = (me_vpid + 1) % nprocs;
|
||||
|
||||
if (me_vpid == 0) {
|
||||
printf("vpid %d sending to vpid %d\n", me_vpid, right_vpid);
|
||||
ret = mca_oob.oob_send(job, right_vpid, 0, &data, sizeof(int));
|
||||
assert(ret == LAM_SUCCESS);
|
||||
count--;
|
||||
}
|
||||
|
||||
tmp = malloc(strlen("MCA_OOB_BASE_VPID") + strlen(argv[1]) + 2);
|
||||
sprintf(tmp, "MCA_OOB_BASE_VPID=%s", argv[1]);
|
||||
putenv(tmp);
|
||||
while (count > 0) {
|
||||
int *data_ptr;
|
||||
size_t data_ptr_len;
|
||||
printf("vpid %d recving from vpid %d\n", me_vpid, left_vpid);
|
||||
ret = mca_oob.oob_recv(job, left_vpid, &tag, &data_ptr, &data_ptr_len);
|
||||
assert(ret == LAM_SUCCESS);
|
||||
assert(data_ptr_len == sizeof(int));
|
||||
assert(*data_ptr == data);
|
||||
|
||||
target_vpid = atoi(argv[2]);
|
||||
|
||||
ret = mca_oob_cofs_open();
|
||||
if (ret != LAM_SUCCESS) {
|
||||
printf("mca_oob_cofs_open returned %d\n", ret);
|
||||
exit(1);
|
||||
printf("vpid %d sending to vpid %d\n", me_vpid, right_vpid);
|
||||
ret = mca_oob.oob_send(job, right_vpid, 0, &data, sizeof(int));
|
||||
assert(ret == LAM_SUCCESS);
|
||||
|
||||
count--;
|
||||
}
|
||||
|
||||
init_ret = mca_oob_cofs_init(&priority, &allow_threads, &have_hidden_threads);
|
||||
if (init_ret == NULL) {
|
||||
printf("mca_oob_cofs_init returned NULL\n");
|
||||
exit(1);
|
||||
} else {
|
||||
printf("mca_oob_cofs_query said \"go\" with priority %d\n", priority);
|
||||
|
||||
if (me_vpid == 0) {
|
||||
int *data_ptr;
|
||||
size_t data_ptr_len;
|
||||
printf("vpid %d recving from vpid %d\n", me_vpid, left_vpid);
|
||||
ret = mca_oob.oob_recv(job, left_vpid, &tag, &data_ptr, &data_ptr_len);
|
||||
assert(ret == LAM_SUCCESS);
|
||||
assert(data_ptr_len == sizeof(int));
|
||||
assert(*data_ptr == data);
|
||||
}
|
||||
|
||||
printf("#\n# Sending Messages\n#\n\n");
|
||||
for (msg_count = 0 ; msg_count < 20 ; ++msg_count) {
|
||||
sprintf(buffer, "%s's message number %d\n", argv[1], msg_count);
|
||||
printf("%d %d: %s\n", target_vpid, 1, buffer);
|
||||
ret = mca_oob_cofs_send("foobar", target_vpid, 1, buffer, strlen(buffer) + 1);
|
||||
if (ret != LAM_SUCCESS) {
|
||||
printf("mca_oob_cofs_send failed on msg_count %d\n", msg_count);
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
ret = lam_rte_finalize();
|
||||
assert(ret == LAM_SUCCESS);
|
||||
|
||||
printf("#\n# Receiving Messages\n#\n\n");
|
||||
for (msg_count = 0 ; msg_count < 20 ; ++msg_count) {
|
||||
source_tag = 1;
|
||||
ret = mca_oob_cofs_recv("foobar", target_vpid, &source_tag, (void**) &source_data, &source_len);
|
||||
if (ret != LAM_SUCCESS) {
|
||||
printf("mca_oob_cofs_recv failed on msg_count %d, %d\n", msg_count, ret);
|
||||
exit(1);
|
||||
}
|
||||
printf("%d %d: %s\n", source_vpid, source_tag, source_data);
|
||||
free(source_data);
|
||||
}
|
||||
|
||||
printf("#\n# Finished\n#\n\n");
|
||||
return 0;
|
||||
}
|
||||
|
@ -1,66 +0,0 @@
|
||||
#include "lam_config.h"
|
||||
|
||||
#include "lam/constants.h"
|
||||
#include "mca/lam/oob/oob.h"
|
||||
#include "mca/lam/oob/cofs/src/oob_cofs.h"
|
||||
#include "mca/lam/pcm/pcm.h"
|
||||
#include "mca/lam/pcm/cofs/src/pcm_cofs.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
|
||||
int
|
||||
main(int argc, char *argv[])
|
||||
{
|
||||
int ret;
|
||||
struct mca_pcm_1_0_0_t* modret;
|
||||
mca_pcm_proc_t *procs;
|
||||
size_t nprocs;
|
||||
mca_pcm_proc_t *my_proc;
|
||||
int priority = 0;
|
||||
bool allow_threads = false;
|
||||
bool have_hidden_threads = false;
|
||||
|
||||
modret = mca_pcm_cofs_init(&priority, &allow_threads, &have_hidden_threads);
|
||||
if (modret == NULL) {
|
||||
printf("failed to init PCM module. Aborting.\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
/*
|
||||
* Do all the stuff that a pcm module user would do
|
||||
*/
|
||||
ret = mca_pcm_cofs_proc_startup();
|
||||
if (ret != MPI_SUCCESS) {
|
||||
printf("Failed in cofs_startup() with retcode: %d\n", ret);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
ret = mca_pcm_cofs_proc_get_peers(&procs, &nprocs);
|
||||
if (ret != MPI_SUCCESS) {
|
||||
printf("Failed in cofs_proc_get_peers() with retcode %d\n", ret);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
my_proc = mca_pcm_cofs_proc_get_me();
|
||||
if (my_proc == NULL) {
|
||||
printf("Failed to get my proc entry\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
printf("Hello, World, I am vpid %d of job %s\n", my_proc->vpid, my_proc->job_handle);
|
||||
|
||||
/*
|
||||
* from here, we can bring up the OOB interface (if it isn't already) and be
|
||||
* ready to run...
|
||||
*/
|
||||
|
||||
/*
|
||||
* Clean up after ourselves - normall that mca interface would do that for us...
|
||||
*/
|
||||
mca_pcm_cofs_close();
|
||||
|
||||
return 0;
|
||||
}
|
Загрузка…
x
Ссылка в новой задаче
Block a user