* Fix a number of environment variable naming issues in the cofs code. This
really needs to use the param registry code * make mpirun use -f -n to start processes - seems to make ssh happier * update oob test to go through the mca layer This commit was SVN r663.
Этот коммит содержится в:
родитель
4d601421b2
Коммит
4c9d393a0e
@ -110,7 +110,7 @@ mca_oob_cofs_init(int *priority, bool *allow_multi_user_threads,
|
|||||||
* BWB - fix me, make register the "right" way...
|
* BWB - fix me, make register the "right" way...
|
||||||
*/
|
*/
|
||||||
/* find our vpid */
|
/* find our vpid */
|
||||||
tmp = getenv("MCA_OOB_BASE_VPID");
|
tmp = getenv("MCA_common_lam_cofs_my_vpid");
|
||||||
if (tmp == NULL) {
|
if (tmp == NULL) {
|
||||||
printf("oob_cofs can not find vpid\n");
|
printf("oob_cofs can not find vpid\n");
|
||||||
return NULL;
|
return NULL;
|
||||||
|
@ -101,5 +101,7 @@ typedef mca_oob_1_0_0_t mca_oob_t;
|
|||||||
/* oob v1.0 */ \
|
/* oob v1.0 */ \
|
||||||
"oob", 1, 0, 0
|
"oob", 1, 0, 0
|
||||||
|
|
||||||
|
extern mca_oob_t mca_oob;
|
||||||
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -58,6 +58,8 @@
|
|||||||
#include "mca/mca.h"
|
#include "mca/mca.h"
|
||||||
#include "lam/types.h"
|
#include "lam/types.h"
|
||||||
|
|
||||||
|
#include <sys/param.h>
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* "PCM" global types
|
* "PCM" global types
|
||||||
*/
|
*/
|
||||||
@ -66,6 +68,7 @@
|
|||||||
#define LAM_PCM_PROC_OTHER 2
|
#define LAM_PCM_PROC_OTHER 2
|
||||||
|
|
||||||
struct mca_pcm_rte_node_t {
|
struct mca_pcm_rte_node_t {
|
||||||
|
char name[MAXHOSTNAMELEN];
|
||||||
int32_t node_num;
|
int32_t node_num;
|
||||||
int32_t num_procs;
|
int32_t num_procs;
|
||||||
};
|
};
|
||||||
|
@ -108,7 +108,7 @@ mca_registry_cofs_init(int *priority, bool *allow_multi_user_threads,
|
|||||||
* BWB - fix me, make register the "right" way...
|
* BWB - fix me, make register the "right" way...
|
||||||
*/
|
*/
|
||||||
/* find our vpid */
|
/* find our vpid */
|
||||||
tmp = getenv("MCA_REGISTRY_BASE_VPID");
|
tmp = getenv("MCA_common_lam_cofs_my_vpid");
|
||||||
if (tmp == NULL) {
|
if (tmp == NULL) {
|
||||||
printf("registry_cofs can not find vpid\n");
|
printf("registry_cofs can not find vpid\n");
|
||||||
return NULL;
|
return NULL;
|
||||||
|
@ -91,5 +91,4 @@ fi
|
|||||||
# Spawn and run
|
# Spawn and run
|
||||||
#
|
#
|
||||||
######################################################################
|
######################################################################
|
||||||
exec $@ &
|
exec $@
|
||||||
exit 0
|
|
||||||
|
@ -116,7 +116,7 @@ start_proc() {
|
|||||||
local argv="$*"
|
local argv="$*"
|
||||||
local rmt_boot="${lam_mydir}/mpiboot"
|
local rmt_boot="${lam_mydir}/mpiboot"
|
||||||
|
|
||||||
local cmd="ssh ${rmt_hostname} ${rmt_boot} -myvpid ${rmt_vpid}"
|
local cmd="ssh ${rmt_hostname} -f -n ${rmt_boot} -myvpid ${rmt_vpid}"
|
||||||
cmd="${cmd} -numprocs ${lam_numprocs} -jobhandle ${lam_jobhandle}"
|
cmd="${cmd} -numprocs ${lam_numprocs} -jobhandle ${lam_jobhandle}"
|
||||||
cmd="${cmd} -pwd ${lam_cwd}"
|
cmd="${cmd} -pwd ${lam_cwd}"
|
||||||
if test ! -z "${MCA_common_lam_cofs_comm_dir}" ; then
|
if test ! -z "${MCA_common_lam_cofs_comm_dir}" ; then
|
||||||
@ -193,7 +193,7 @@ launch_procs() {
|
|||||||
# Do the prep work
|
# Do the prep work
|
||||||
#
|
#
|
||||||
######################################################################
|
######################################################################
|
||||||
lam_jobhandle="pcm_cofs_job_handle_${$}_0"
|
lam_jobhandle="pcm-cofs-job-handle-${$}-0"
|
||||||
|
|
||||||
|
|
||||||
######################################################################
|
######################################################################
|
||||||
|
@ -5,15 +5,9 @@
|
|||||||
|
|
||||||
include $(top_srcdir)/config/Makefile.options
|
include $(top_srcdir)/config/Makefile.options
|
||||||
|
|
||||||
noinst_PROGRAMS = oob_cofs_test pcm_cofs_test
|
noinst_PROGRAMS = oob_cofs_test
|
||||||
|
|
||||||
oob_cofs_test_SOURCES = oob_cofs_test.c
|
oob_cofs_test_SOURCES = oob_cofs_test.c
|
||||||
oob_cofs_test_LDADD = \
|
oob_cofs_test_LDADD = \
|
||||||
../../../../src/mca/lam/oob/cofs/libmca_lam_oob_cofs.la \
|
../../../../src/mca/lam/oob/cofs/libmca_lam_oob_cofs.la \
|
||||||
../../../../src/liblam.la
|
../../../../src/liblam.la
|
||||||
|
|
||||||
pcm_cofs_test_SOURCES = pcm_cofs_test.c
|
|
||||||
pcm_cofs_test_LDADD = \
|
|
||||||
../../../../src/mca/lam/pcm/cofs/libmca_lam_pcm_cofs.la \
|
|
||||||
../../../../src/mca/lam/oob/cofs/libmca_lam_oob_cofs.la \
|
|
||||||
../../../../src/liblam.la
|
|
||||||
|
@ -1,74 +1,92 @@
|
|||||||
|
#include "lam/runtime/runtime.h"
|
||||||
#include "mca/lam/oob/oob.h"
|
#include "mca/lam/oob/oob.h"
|
||||||
#include "mca/lam/oob/cofs/src/oob_cofs.h"
|
#include "mca/lam/pcm/pcm.h"
|
||||||
|
#include "mca/lam/base/base.h"
|
||||||
|
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
#include <assert.h>
|
||||||
|
|
||||||
int
|
int
|
||||||
main(int argc, char* argv[])
|
main(int argc, char* argv[])
|
||||||
{
|
{
|
||||||
int ret;
|
int ret;
|
||||||
char *tmp;
|
mca_pcm_proc_t *procs;
|
||||||
struct mca_oob_1_0_0_t *init_ret;
|
size_t nprocs;
|
||||||
int target_vpid, source_vpid, source_tag;
|
mca_pcm_proc_t *me;
|
||||||
size_t source_len;
|
int left_vpid, right_vpid, me_vpid;
|
||||||
char *source_data;
|
int count = 2;
|
||||||
char buffer[2048];
|
lam_job_handle_t job;
|
||||||
int msg_count = 0;
|
int data = 0xDEADBEEF;
|
||||||
int priority = 0;
|
int tag = MCA_OOB_ANY_TAG;
|
||||||
bool allow_threads = false;
|
bool threads, hidden;
|
||||||
bool have_hidden_threads = false;
|
|
||||||
|
|
||||||
|
printf("hello, world!\n");
|
||||||
|
ret = lam_init(argc, argv);
|
||||||
|
assert(ret == LAM_SUCCESS);
|
||||||
|
|
||||||
if (argc != 3) {
|
ret = mca_base_open();
|
||||||
printf("usage: %s my_vpid target_vpid\n", argv[0]);
|
assert(ret == LAM_SUCCESS);
|
||||||
exit(1);
|
|
||||||
|
ret = lam_rte_init(&threads, &hidden);
|
||||||
|
assert(ret == LAM_SUCCESS);
|
||||||
|
|
||||||
|
ret = mca_pcm.pcm_proc_startup();
|
||||||
|
assert(ret == LAM_SUCCESS);
|
||||||
|
|
||||||
|
ret = mca_pcm.pcm_proc_get_peers(&procs, &nprocs);
|
||||||
|
assert(ret == LAM_SUCCESS);
|
||||||
|
|
||||||
|
job = mca_pcm.pcm_handle_get();
|
||||||
|
assert(job != NULL);
|
||||||
|
|
||||||
|
me = mca_pcm.pcm_proc_get_me();
|
||||||
|
assert(me != NULL);
|
||||||
|
|
||||||
|
/* time to play the ring game! */
|
||||||
|
me_vpid = me->vpid;
|
||||||
|
printf("Hello, World. I am vpid %d\n", me_vpid);
|
||||||
|
|
||||||
|
left_vpid = me_vpid == 0 ? nprocs - 1 : me_vpid - 1;
|
||||||
|
right_vpid = (me_vpid + 1) % nprocs;
|
||||||
|
|
||||||
|
if (me_vpid == 0) {
|
||||||
|
printf("vpid %d sending to vpid %d\n", me_vpid, right_vpid);
|
||||||
|
ret = mca_oob.oob_send(job, right_vpid, 0, &data, sizeof(int));
|
||||||
|
assert(ret == LAM_SUCCESS);
|
||||||
|
count--;
|
||||||
}
|
}
|
||||||
|
|
||||||
tmp = malloc(strlen("MCA_OOB_BASE_VPID") + strlen(argv[1]) + 2);
|
while (count > 0) {
|
||||||
sprintf(tmp, "MCA_OOB_BASE_VPID=%s", argv[1]);
|
int *data_ptr;
|
||||||
putenv(tmp);
|
size_t data_ptr_len;
|
||||||
|
printf("vpid %d recving from vpid %d\n", me_vpid, left_vpid);
|
||||||
|
ret = mca_oob.oob_recv(job, left_vpid, &tag, &data_ptr, &data_ptr_len);
|
||||||
|
assert(ret == LAM_SUCCESS);
|
||||||
|
assert(data_ptr_len == sizeof(int));
|
||||||
|
assert(*data_ptr == data);
|
||||||
|
|
||||||
target_vpid = atoi(argv[2]);
|
printf("vpid %d sending to vpid %d\n", me_vpid, right_vpid);
|
||||||
|
ret = mca_oob.oob_send(job, right_vpid, 0, &data, sizeof(int));
|
||||||
|
assert(ret == LAM_SUCCESS);
|
||||||
|
|
||||||
ret = mca_oob_cofs_open();
|
count--;
|
||||||
if (ret != LAM_SUCCESS) {
|
|
||||||
printf("mca_oob_cofs_open returned %d\n", ret);
|
|
||||||
exit(1);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
init_ret = mca_oob_cofs_init(&priority, &allow_threads, &have_hidden_threads);
|
|
||||||
if (init_ret == NULL) {
|
if (me_vpid == 0) {
|
||||||
printf("mca_oob_cofs_init returned NULL\n");
|
int *data_ptr;
|
||||||
exit(1);
|
size_t data_ptr_len;
|
||||||
} else {
|
printf("vpid %d recving from vpid %d\n", me_vpid, left_vpid);
|
||||||
printf("mca_oob_cofs_query said \"go\" with priority %d\n", priority);
|
ret = mca_oob.oob_recv(job, left_vpid, &tag, &data_ptr, &data_ptr_len);
|
||||||
|
assert(ret == LAM_SUCCESS);
|
||||||
|
assert(data_ptr_len == sizeof(int));
|
||||||
|
assert(*data_ptr == data);
|
||||||
}
|
}
|
||||||
|
|
||||||
printf("#\n# Sending Messages\n#\n\n");
|
ret = lam_rte_finalize();
|
||||||
for (msg_count = 0 ; msg_count < 20 ; ++msg_count) {
|
assert(ret == LAM_SUCCESS);
|
||||||
sprintf(buffer, "%s's message number %d\n", argv[1], msg_count);
|
|
||||||
printf("%d %d: %s\n", target_vpid, 1, buffer);
|
|
||||||
ret = mca_oob_cofs_send("foobar", target_vpid, 1, buffer, strlen(buffer) + 1);
|
|
||||||
if (ret != LAM_SUCCESS) {
|
|
||||||
printf("mca_oob_cofs_send failed on msg_count %d\n", msg_count);
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
printf("#\n# Receiving Messages\n#\n\n");
|
|
||||||
for (msg_count = 0 ; msg_count < 20 ; ++msg_count) {
|
|
||||||
source_tag = 1;
|
|
||||||
ret = mca_oob_cofs_recv("foobar", target_vpid, &source_tag, (void**) &source_data, &source_len);
|
|
||||||
if (ret != LAM_SUCCESS) {
|
|
||||||
printf("mca_oob_cofs_recv failed on msg_count %d, %d\n", msg_count, ret);
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
printf("%d %d: %s\n", source_vpid, source_tag, source_data);
|
|
||||||
free(source_data);
|
|
||||||
}
|
|
||||||
|
|
||||||
printf("#\n# Finished\n#\n\n");
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -1,66 +0,0 @@
|
|||||||
#include "lam_config.h"
|
|
||||||
|
|
||||||
#include "lam/constants.h"
|
|
||||||
#include "mca/lam/oob/oob.h"
|
|
||||||
#include "mca/lam/oob/cofs/src/oob_cofs.h"
|
|
||||||
#include "mca/lam/pcm/pcm.h"
|
|
||||||
#include "mca/lam/pcm/cofs/src/pcm_cofs.h"
|
|
||||||
|
|
||||||
#include <stdio.h>
|
|
||||||
#include <stdlib.h>
|
|
||||||
#include <string.h>
|
|
||||||
|
|
||||||
|
|
||||||
int
|
|
||||||
main(int argc, char *argv[])
|
|
||||||
{
|
|
||||||
int ret;
|
|
||||||
struct mca_pcm_1_0_0_t* modret;
|
|
||||||
mca_pcm_proc_t *procs;
|
|
||||||
size_t nprocs;
|
|
||||||
mca_pcm_proc_t *my_proc;
|
|
||||||
int priority = 0;
|
|
||||||
bool allow_threads = false;
|
|
||||||
bool have_hidden_threads = false;
|
|
||||||
|
|
||||||
modret = mca_pcm_cofs_init(&priority, &allow_threads, &have_hidden_threads);
|
|
||||||
if (modret == NULL) {
|
|
||||||
printf("failed to init PCM module. Aborting.\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Do all the stuff that a pcm module user would do
|
|
||||||
*/
|
|
||||||
ret = mca_pcm_cofs_proc_startup();
|
|
||||||
if (ret != MPI_SUCCESS) {
|
|
||||||
printf("Failed in cofs_startup() with retcode: %d\n", ret);
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
ret = mca_pcm_cofs_proc_get_peers(&procs, &nprocs);
|
|
||||||
if (ret != MPI_SUCCESS) {
|
|
||||||
printf("Failed in cofs_proc_get_peers() with retcode %d\n", ret);
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
my_proc = mca_pcm_cofs_proc_get_me();
|
|
||||||
if (my_proc == NULL) {
|
|
||||||
printf("Failed to get my proc entry\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
printf("Hello, World, I am vpid %d of job %s\n", my_proc->vpid, my_proc->job_handle);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* from here, we can bring up the OOB interface (if it isn't already) and be
|
|
||||||
* ready to run...
|
|
||||||
*/
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Clean up after ourselves - normall that mca interface would do that for us...
|
|
||||||
*/
|
|
||||||
mca_pcm_cofs_close();
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
Загрузка…
x
Ссылка в новой задаче
Block a user