1
1

Merge pull request #4498 from rhc54/topic/pmixup

Some minor cleanups of the DVM
Этот коммит содержится в:
Ralph Castain 2017-11-12 19:01:15 -08:00 коммит произвёл GitHub
родитель 6d6f0beb62 4381b2c60f
Коммит 6eb3c124e1
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
6 изменённых файлов: 38 добавлений и 8 удалений

Просмотреть файл

@ -22,6 +22,7 @@ my $rawoutput = 0;
my $myresults = "myresults"; my $myresults = "myresults";
my $ppn = 1; my $ppn = 1;
my @csvrow; my @csvrow;
my $multiplier = 1;
my @tests = qw(/bin/true ./orte_no_op ./mpi_no_op ./mpi_no_op ./mpi_no_op); my @tests = qw(/bin/true ./orte_no_op ./mpi_no_op ./mpi_no_op ./mpi_no_op);
my @options = ("", "", "", "-mca mpi_add_procs_cutoff 0 -mca pmix_base_async_modex 1 -mca pmix_base_collect_data 0", "-mca mpi_add_procs_cutoff 0 -mca pmix_base_async_modex 1 -mca async_mpi_init 1 -mca async_mpi_finalize 1 -mca pmix_base_collect_data 0"); my @options = ("", "", "", "-mca mpi_add_procs_cutoff 0 -mca pmix_base_async_modex 1 -mca pmix_base_collect_data 0", "-mca mpi_add_procs_cutoff 0 -mca pmix_base_async_modex 1 -mca async_mpi_init 1 -mca async_mpi_finalize 1 -mca pmix_base_collect_data 0");
@ -52,6 +53,7 @@ GetOptions(
"results=s" => \$myresults, "results=s" => \$myresults,
"rawout" => \$rawoutput, "rawout" => \$rawoutput,
"ppn=s" => \$ppn, "ppn=s" => \$ppn,
"multiplier=s" => \$multiplier,
) or die "unable to parse options, stopped"; ) or die "unable to parse options, stopped";
if ($HELP) { if ($HELP) {
@ -69,6 +71,7 @@ if ($HELP) {
--results=file File where results are to be stored in comma-separated value format --results=file File where results are to be stored in comma-separated value format
--rawout Provide raw timing output to the file --rawout Provide raw timing output to the file
--ppn=n Run n procs/node --ppn=n Run n procs/node
--multiplier=n Run n daemons/node (only for DVM and mpirun)
"; ";
exit(0); exit(0);
} }
@ -124,6 +127,9 @@ foreach $starter (@starterlist) {
} elsif ($usempirun && $starter eq "mpirun") { } elsif ($usempirun && $starter eq "mpirun") {
push @starters, $starter; push @starters, $starter;
$opt = $starteroptionlist[$idx] . " --npernode " . $ppn; $opt = $starteroptionlist[$idx] . " --npernode " . $ppn;
if ($multiplier gt 1) {
$opt = $opt . " --mca rtc ^hwloc --mca ras_base_multiplier " . $multiplier;
}
push @starteroptions, $opt; push @starteroptions, $opt;
} elsif ($useaprun && $starter eq "aprun") { } elsif ($useaprun && $starter eq "aprun") {
push @starters, $starter; push @starters, $starter;
@ -267,14 +273,18 @@ foreach $starter (@starters) {
print "STARTER: $starter\n"; print "STARTER: $starter\n";
# if we are going to use the dvm, then we # if we are going to use the dvm, then we
if ($starter eq "prun") { if ($starter eq "prun") {
my $dvm = "orte-dvm --system-server";
if ($multiplier gt 1) {
$dvm = $dvm . " --mca rtc ^hwloc --mca ras_base_multiplier " . $multiplier;
}
# need to start it # need to start it
if ($myresults) { if ($myresults) {
print FILE "\n\norte-dvm --system-server\n"; print FILE "\n\n$dvm\n";
} }
if (!$SHOWME) { if (!$SHOWME) {
unless ($pid = fork) { unless ($pid = fork) {
unless (fork) { unless (fork) {
exec "orte-dvm --system-server 2>&1"; exec "$dvm 2>&1";
die "no exec"; die "no exec";
} }
exit 0; exit 0;

Просмотреть файл

@ -528,10 +528,16 @@ static pmix_status_t parse_uri_file(char *filename,
pmix_client_globals.myserver->proc_type = PMIX_PROC_SERVER | PMIX_PROC_V20; pmix_client_globals.myserver->proc_type = PMIX_PROC_SERVER | PMIX_PROC_V20;
pmix_output_verbose(2, pmix_ptl_base_framework.framework_output, pmix_output_verbose(2, pmix_ptl_base_framework.framework_output,
"V20 SERVER DETECTED"); "V20 SERVER DETECTED");
} else if (0 == strncmp(p2, "v2.1", strlen("v2.1"))) { } else if (0 == strncmp(p2, "v2.1", strlen("v2.1")) ||
0 == strncmp(p2, "2.1", strlen("2.1"))) {
pmix_client_globals.myserver->proc_type = PMIX_PROC_SERVER | PMIX_PROC_V21; pmix_client_globals.myserver->proc_type = PMIX_PROC_SERVER | PMIX_PROC_V21;
pmix_output_verbose(2, pmix_ptl_base_framework.framework_output, pmix_output_verbose(2, pmix_ptl_base_framework.framework_output,
"V21 SERVER DETECTED"); "V21 SERVER DETECTED");
} else if (0 == strncmp(p2, "3", strlen("3")) ||
0 == strncmp(p2, "v3", strlen("v3"))) {
pmix_client_globals.myserver->proc_type = PMIX_PROC_SERVER | PMIX_PROC_V3;
pmix_output_verbose(2, pmix_ptl_base_framework.framework_output,
"V3 SERVER DETECTED");
} else { } else {
pmix_output_verbose(2, pmix_ptl_base_framework.framework_output, pmix_output_verbose(2, pmix_ptl_base_framework.framework_output,
"UNKNOWN SERVER VERSION DETECTED: %s", p2); "UNKNOWN SERVER VERSION DETECTED: %s", p2);

Просмотреть файл

@ -625,6 +625,9 @@ static pmix_status_t setup_listener(pmix_info_t info[], size_t ninfo,
CLOSE_THE_SOCKET(lt->socket); CLOSE_THE_SOCKET(lt->socket);
goto sockerror; goto sockerror;
} }
pmix_output_verbose(2, pmix_ptl_base_framework.framework_output,
"WRITING SYSTEM FILE %s",
mca_ptl_tcp_component.system_filename);
fp = fopen(mca_ptl_tcp_component.system_filename, "w"); fp = fopen(mca_ptl_tcp_component.system_filename, "w");
if (NULL == fp) { if (NULL == fp) {
pmix_output(0, "Impossible to open the file %s in write mode\n", mca_ptl_tcp_component.system_filename); pmix_output(0, "Impossible to open the file %s in write mode\n", mca_ptl_tcp_component.system_filename);
@ -637,7 +640,7 @@ static pmix_status_t setup_listener(pmix_info_t info[], size_t ninfo,
/* output my nspace and rank plus the URI */ /* output my nspace and rank plus the URI */
fprintf(fp, "%s\n", lt->uri); fprintf(fp, "%s\n", lt->uri);
/* add a flag that indicates we accept v2.1 protocols */ /* add a flag that indicates we accept v3.0 protocols */
fprintf(fp, "v%s\n", PMIX_VERSION); fprintf(fp, "v%s\n", PMIX_VERSION);
fclose(fp); fclose(fp);
/* set the file mode */ /* set the file mode */
@ -659,6 +662,9 @@ static pmix_status_t setup_listener(pmix_info_t info[], size_t ninfo,
CLOSE_THE_SOCKET(lt->socket); CLOSE_THE_SOCKET(lt->socket);
goto sockerror; goto sockerror;
} }
pmix_output_verbose(2, pmix_ptl_base_framework.framework_output,
"WRITING TOOL FILE %s",
mca_ptl_tcp_component.session_filename);
fp = fopen(mca_ptl_tcp_component.session_filename, "w"); fp = fopen(mca_ptl_tcp_component.session_filename, "w");
if (NULL == fp) { if (NULL == fp) {
pmix_output(0, "Impossible to open the file %s in write mode\n", mca_ptl_tcp_component.session_filename); pmix_output(0, "Impossible to open the file %s in write mode\n", mca_ptl_tcp_component.session_filename);

Просмотреть файл

@ -264,12 +264,10 @@ pmix_status_t pmix_server_commit(pmix_peer_t *peer, pmix_buffer_t *buf)
} }
} }
/* see if anyone local is waiting on this data- could be more than one */ /* see if anyone local is waiting on this data- could be more than one */
pmix_output(0, "CHECKING PENDING");
rc = pmix_pending_resolve(nptr, info->pname.rank, PMIX_SUCCESS, NULL); rc = pmix_pending_resolve(nptr, info->pname.rank, PMIX_SUCCESS, NULL);
if (PMIX_SUCCESS != rc) { if (PMIX_SUCCESS != rc) {
PMIX_ERROR_LOG(rc); PMIX_ERROR_LOG(rc);
} }
pmix_output(0, "RETURNING %d", rc);
return rc; return rc;
} }

Просмотреть файл

@ -286,7 +286,8 @@ int pmix_server_init(void)
* PMIx connection point - only do this for the HNP as, in * PMIx connection point - only do this for the HNP as, in
* at least one case, a daemon can be colocated with the * at least one case, a daemon can be colocated with the
* HNP and would overwrite the server rendezvous file */ * HNP and would overwrite the server rendezvous file */
if (orte_pmix_server_globals.system_server && ORTE_PROC_IS_HNP) { if (orte_pmix_server_globals.system_server &&
(ORTE_PROC_IS_HNP || ORTE_PROC_IS_MASTER)) {
kv = OBJ_NEW(opal_value_t); kv = OBJ_NEW(opal_value_t);
kv->key = strdup(OPAL_PMIX_SERVER_SYSTEM_SUPPORT); kv->key = strdup(OPAL_PMIX_SERVER_SYSTEM_SUPPORT);
kv->type = OPAL_BOOL; kv->type = OPAL_BOOL;

Просмотреть файл

@ -742,9 +742,18 @@ static void _toolconn(int sd, short args, void *cbdata)
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
/* if we are the HNP, we can directly assign the jobid */ /* if we are the HNP, we can directly assign the jobid */
if (ORTE_PROC_IS_HNP) { if (ORTE_PROC_IS_HNP || ORTE_PROC_IS_MASTER) {
jdata = OBJ_NEW(orte_job_t); jdata = OBJ_NEW(orte_job_t);
rc = orte_plm_base_create_jobid(jdata); rc = orte_plm_base_create_jobid(jdata);
if (ORTE_SUCCESS != rc) {
tool.jobid = ORTE_JOBID_INVALID;
tool.vpid = 0;
if (NULL != cd->toolcbfunc) {
cd->toolcbfunc(rc, tool, cd->cbdata);
}
OBJ_RELEASE(cd);
return;
}
opal_hash_table_set_value_uint32(orte_job_data, jdata->jobid, jdata); opal_hash_table_set_value_uint32(orte_job_data, jdata->jobid, jdata);
/* setup some required job-level fields in case this /* setup some required job-level fields in case this
* tool calls spawn, or uses some other functions that * tool calls spawn, or uses some other functions that