diff --git a/contrib/scaling/scaling.pl b/contrib/scaling/scaling.pl index b4ef11a59a..f5d5da3765 100755 --- a/contrib/scaling/scaling.pl +++ b/contrib/scaling/scaling.pl @@ -22,6 +22,7 @@ my $rawoutput = 0; my $myresults = "myresults"; my $ppn = 1; my @csvrow; +my $multiplier = 1; my @tests = qw(/bin/true ./orte_no_op ./mpi_no_op ./mpi_no_op ./mpi_no_op); my @options = ("", "", "", "-mca mpi_add_procs_cutoff 0 -mca pmix_base_async_modex 1 -mca pmix_base_collect_data 0", "-mca mpi_add_procs_cutoff 0 -mca pmix_base_async_modex 1 -mca async_mpi_init 1 -mca async_mpi_finalize 1 -mca pmix_base_collect_data 0"); @@ -52,6 +53,7 @@ GetOptions( "results=s" => \$myresults, "rawout" => \$rawoutput, "ppn=s" => \$ppn, + "multiplier=s" => \$multiplier, ) or die "unable to parse options, stopped"; if ($HELP) { @@ -69,6 +71,7 @@ if ($HELP) { --results=file File where results are to be stored in comma-separated value format --rawout Provide raw timing output to the file --ppn=n Run n procs/node +--multiplier=n Run n daemons/node (only for DVM and mpirun) "; exit(0); } @@ -124,6 +127,9 @@ foreach $starter (@starterlist) { } elsif ($usempirun && $starter eq "mpirun") { push @starters, $starter; $opt = $starteroptionlist[$idx] . " --npernode " . $ppn; + if ($multiplier gt 1) { + $opt = $opt . " --mca rtc ^hwloc --mca ras_base_multiplier " . $multiplier; + } push @starteroptions, $opt; } elsif ($useaprun && $starter eq "aprun") { push @starters, $starter; @@ -267,14 +273,18 @@ foreach $starter (@starters) { print "STARTER: $starter\n"; # if we are going to use the dvm, then we if ($starter eq "prun") { + my $dvm = "orte-dvm --system-server"; + if ($multiplier gt 1) { + $dvm = $dvm . " --mca rtc ^hwloc --mca ras_base_multiplier " . $multiplier; + } # need to start it if ($myresults) { - print FILE "\n\norte-dvm --system-server\n"; + print FILE "\n\n$dvm\n"; } if (!$SHOWME) { unless ($pid = fork) { unless (fork) { - exec "orte-dvm --system-server 2>&1"; + exec "$dvm 2>&1"; die "no exec"; } exit 0; diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/ptl/tcp/ptl_tcp.c b/opal/mca/pmix/pmix3x/pmix/src/mca/ptl/tcp/ptl_tcp.c index a900230c3b..1c46e2923e 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/ptl/tcp/ptl_tcp.c +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/ptl/tcp/ptl_tcp.c @@ -528,10 +528,16 @@ static pmix_status_t parse_uri_file(char *filename, pmix_client_globals.myserver->proc_type = PMIX_PROC_SERVER | PMIX_PROC_V20; pmix_output_verbose(2, pmix_ptl_base_framework.framework_output, "V20 SERVER DETECTED"); - } else if (0 == strncmp(p2, "v2.1", strlen("v2.1"))) { + } else if (0 == strncmp(p2, "v2.1", strlen("v2.1")) || + 0 == strncmp(p2, "2.1", strlen("2.1"))) { pmix_client_globals.myserver->proc_type = PMIX_PROC_SERVER | PMIX_PROC_V21; pmix_output_verbose(2, pmix_ptl_base_framework.framework_output, "V21 SERVER DETECTED"); + } else if (0 == strncmp(p2, "3", strlen("3")) || + 0 == strncmp(p2, "v3", strlen("v3"))) { + pmix_client_globals.myserver->proc_type = PMIX_PROC_SERVER | PMIX_PROC_V3; + pmix_output_verbose(2, pmix_ptl_base_framework.framework_output, + "V3 SERVER DETECTED"); } else { pmix_output_verbose(2, pmix_ptl_base_framework.framework_output, "UNKNOWN SERVER VERSION DETECTED: %s", p2); diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/ptl/tcp/ptl_tcp_component.c b/opal/mca/pmix/pmix3x/pmix/src/mca/ptl/tcp/ptl_tcp_component.c index 1f1d3e89b2..69ae60e55d 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/ptl/tcp/ptl_tcp_component.c +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/ptl/tcp/ptl_tcp_component.c @@ -625,6 +625,9 @@ static pmix_status_t setup_listener(pmix_info_t info[], size_t ninfo, CLOSE_THE_SOCKET(lt->socket); goto sockerror; } + pmix_output_verbose(2, pmix_ptl_base_framework.framework_output, + "WRITING SYSTEM FILE %s", + mca_ptl_tcp_component.system_filename); fp = fopen(mca_ptl_tcp_component.system_filename, "w"); if (NULL == fp) { pmix_output(0, "Impossible to open the file %s in write mode\n", mca_ptl_tcp_component.system_filename); @@ -637,7 +640,7 @@ static pmix_status_t setup_listener(pmix_info_t info[], size_t ninfo, /* output my nspace and rank plus the URI */ fprintf(fp, "%s\n", lt->uri); - /* add a flag that indicates we accept v2.1 protocols */ + /* add a flag that indicates we accept v3.0 protocols */ fprintf(fp, "v%s\n", PMIX_VERSION); fclose(fp); /* set the file mode */ @@ -659,6 +662,9 @@ static pmix_status_t setup_listener(pmix_info_t info[], size_t ninfo, CLOSE_THE_SOCKET(lt->socket); goto sockerror; } + pmix_output_verbose(2, pmix_ptl_base_framework.framework_output, + "WRITING TOOL FILE %s", + mca_ptl_tcp_component.session_filename); fp = fopen(mca_ptl_tcp_component.session_filename, "w"); if (NULL == fp) { pmix_output(0, "Impossible to open the file %s in write mode\n", mca_ptl_tcp_component.session_filename); diff --git a/opal/mca/pmix/pmix3x/pmix/src/server/pmix_server_ops.c b/opal/mca/pmix/pmix3x/pmix/src/server/pmix_server_ops.c index 36a81f8fe6..9476be2ffa 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/server/pmix_server_ops.c +++ b/opal/mca/pmix/pmix3x/pmix/src/server/pmix_server_ops.c @@ -264,12 +264,10 @@ pmix_status_t pmix_server_commit(pmix_peer_t *peer, pmix_buffer_t *buf) } } /* see if anyone local is waiting on this data- could be more than one */ - pmix_output(0, "CHECKING PENDING"); rc = pmix_pending_resolve(nptr, info->pname.rank, PMIX_SUCCESS, NULL); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); } - pmix_output(0, "RETURNING %d", rc); return rc; } diff --git a/orte/orted/pmix/pmix_server.c b/orte/orted/pmix/pmix_server.c index 5f73919add..b29e3170ae 100644 --- a/orte/orted/pmix/pmix_server.c +++ b/orte/orted/pmix/pmix_server.c @@ -286,7 +286,8 @@ int pmix_server_init(void) * PMIx connection point - only do this for the HNP as, in * at least one case, a daemon can be colocated with the * HNP and would overwrite the server rendezvous file */ - if (orte_pmix_server_globals.system_server && ORTE_PROC_IS_HNP) { + if (orte_pmix_server_globals.system_server && + (ORTE_PROC_IS_HNP || ORTE_PROC_IS_MASTER)) { kv = OBJ_NEW(opal_value_t); kv->key = strdup(OPAL_PMIX_SERVER_SYSTEM_SUPPORT); kv->type = OPAL_BOOL; diff --git a/orte/orted/pmix/pmix_server_gen.c b/orte/orted/pmix/pmix_server_gen.c index 39850edf17..13f0a6c2d5 100644 --- a/orte/orted/pmix/pmix_server_gen.c +++ b/orte/orted/pmix/pmix_server_gen.c @@ -742,9 +742,18 @@ static void _toolconn(int sd, short args, void *cbdata) ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); /* if we are the HNP, we can directly assign the jobid */ - if (ORTE_PROC_IS_HNP) { + if (ORTE_PROC_IS_HNP || ORTE_PROC_IS_MASTER) { jdata = OBJ_NEW(orte_job_t); rc = orte_plm_base_create_jobid(jdata); + if (ORTE_SUCCESS != rc) { + tool.jobid = ORTE_JOBID_INVALID; + tool.vpid = 0; + if (NULL != cd->toolcbfunc) { + cd->toolcbfunc(rc, tool, cd->cbdata); + } + OBJ_RELEASE(cd); + return; + } opal_hash_table_set_value_uint32(orte_job_data, jdata->jobid, jdata); /* setup some required job-level fields in case this * tool calls spawn, or uses some other functions that