Allow singletons to use ompi-server for rendezvous via pubsub as well as comm_spawn without starting their own local daemons
This commit was SVN r23384.
Этот коммит содержится в:
родитель
eee7541ae7
Коммит
570d19106b
@ -58,6 +58,7 @@
|
||||
#include "orte/util/proc_info.h"
|
||||
#include "orte/mca/errmgr/errmgr.h"
|
||||
#include "orte/mca/rml/rml.h"
|
||||
#include "orte/orted/orted.h"
|
||||
|
||||
#include "orte/runtime/runtime.h"
|
||||
#include "orte/runtime/orte_globals.h"
|
||||
@ -186,11 +187,11 @@ int main(int argc, char *argv[])
|
||||
tmp_env_var = NULL; /* Silence compiler warning */
|
||||
#endif
|
||||
|
||||
/* Perform the standard init, but flag that we are a tool
|
||||
* so that we only open up the communications infrastructure. No
|
||||
* session directories will be created.
|
||||
*/
|
||||
if (ORTE_SUCCESS != (ret = orte_init(&argc, &argv, ORTE_PROC_TOOL))) {
|
||||
/* don't want session directories */
|
||||
orte_create_session_dirs = false;
|
||||
|
||||
/* Perform the standard init, but flag that we are an HNP */
|
||||
if (ORTE_SUCCESS != (ret = orte_init(&argc, &argv, ORTE_PROC_HNP))) {
|
||||
fprintf(stderr, "ompi-server: failed to initialize -- aborting\n");
|
||||
exit(1);
|
||||
}
|
||||
@ -228,6 +229,15 @@ int main(int argc, char *argv[])
|
||||
exit(1);
|
||||
}
|
||||
|
||||
/* setup to listen for commands sent specifically to me */
|
||||
ret = orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_DAEMON,
|
||||
ORTE_RML_NON_PERSISTENT, orte_daemon_recv, NULL);
|
||||
if (ret != ORTE_SUCCESS && ret != ORTE_ERR_NOT_IMPLEMENTED) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
orte_finalize();
|
||||
exit(1);
|
||||
}
|
||||
|
||||
/* Set signal handlers to catch kill signals so we can properly clean up
|
||||
* after ourselves.
|
||||
*/
|
||||
|
@ -32,6 +32,7 @@
|
||||
#include <signal.h>
|
||||
#include <errno.h>
|
||||
|
||||
#include "opal/hash_string.h"
|
||||
#include "opal/util/argv.h"
|
||||
#include "opal/util/path.h"
|
||||
#include "opal/util/opal_sos.h"
|
||||
@ -94,51 +95,137 @@ orte_ess_base_module_t orte_ess_singleton_module = {
|
||||
static int rte_init(void)
|
||||
{
|
||||
int rc;
|
||||
|
||||
char *server_uri, *param;
|
||||
uint16_t jobfam;
|
||||
uint32_t hash32;
|
||||
uint32_t bias;
|
||||
|
||||
/* run the prolog */
|
||||
if (ORTE_SUCCESS != (rc = orte_ess_base_std_prolog())) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/*
|
||||
* If we are the selected module, then we must be a singleton
|
||||
* as it means that no other method for discovering a name
|
||||
* could be found. In this case, we need to start a daemon that
|
||||
* can support our operation. We must do this for two reasons:
|
||||
*
|
||||
* (1) if we try to play the role of the HNP, then any child processes
|
||||
* we might start via comm_spawn will rely on us for all ORTE-level
|
||||
* support. However, we can only progress those requests when the
|
||||
* the application calls into the OMPI/ORTE library! Thus, if this
|
||||
* singleton just does computation, the other processes will "hang"
|
||||
* in any calls into the ORTE layer that communicate with the HNP -
|
||||
* and most calls on application procs *do*.
|
||||
*
|
||||
* (2) daemons are used to communicate messages for administrative
|
||||
* purposes in a broadcast-like manner. Thus, daemons are expected
|
||||
* to be able to interpret specific commands. Our application process
|
||||
* doesn't have any idea how to handle those commands, thus causing
|
||||
* the entire ORTE administrative system to break down.
|
||||
*
|
||||
* For those reasons, we choose to fork/exec a daemon at this time
|
||||
* and then reconnect ourselves to it. We could just "fork" and declare
|
||||
* the child to be a daemon, but that would require we place *all* of the
|
||||
* daemon command processing code in the ORTE library, do some strange
|
||||
* mojo in a few places, etc. This doesn't seem worth it, so we'll just
|
||||
* do the old fork/exec here
|
||||
*
|
||||
* Note that Windows-based systems have to do their own special trick as
|
||||
* they don't support fork/exec. So we have to use a giant "if" here to
|
||||
* protect the Windows world. To make the results more readable, we put
|
||||
* the whole mess in a separate function below
|
||||
*/
|
||||
if (ORTE_SUCCESS != (rc= fork_hnp())) {
|
||||
/* if this didn't work, then we cannot support operation any further.
|
||||
* Abort the system and tell orte_init to exit
|
||||
*/
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
/* look for the ompi-server MCA param */
|
||||
mca_base_param_reg_string_name("orte", "server",
|
||||
"Server to be used as HNP - [file|FILE]:<filename> or just uri",
|
||||
false, false, NULL, &server_uri);
|
||||
|
||||
if (NULL != server_uri) {
|
||||
/* we are going to connect to a server HNP */
|
||||
if (0 == strncmp(server_uri, "file", strlen("file")) ||
|
||||
0 == strncmp(server_uri, "FILE", strlen("FILE"))) {
|
||||
char input[1024], *filename;
|
||||
FILE *fp;
|
||||
|
||||
/* it is a file - get the filename */
|
||||
filename = strchr(server_uri, ':');
|
||||
if (NULL == filename) {
|
||||
/* filename is not correctly formatted */
|
||||
orte_show_help("help-orterun.txt", "orterun:ompi-server-filename-bad", true,
|
||||
"singleton", server_uri);
|
||||
return ORTE_ERROR;
|
||||
}
|
||||
++filename; /* space past the : */
|
||||
|
||||
if (0 >= strlen(filename)) {
|
||||
/* they forgot to give us the name! */
|
||||
orte_show_help("help-orterun.txt", "orterun:ompi-server-filename-missing", true,
|
||||
"singleton", server_uri);
|
||||
return ORTE_ERROR;
|
||||
}
|
||||
|
||||
/* open the file and extract the uri */
|
||||
fp = fopen(filename, "r");
|
||||
if (NULL == fp) { /* can't find or read file! */
|
||||
orte_show_help("help-orterun.txt", "orterun:ompi-server-filename-access", true,
|
||||
"singleton", server_uri);
|
||||
return ORTE_ERROR;
|
||||
}
|
||||
if (NULL == fgets(input, 1024, fp)) {
|
||||
/* something malformed about file */
|
||||
fclose(fp);
|
||||
orte_show_help("help-orterun.txt", "orterun:ompi-server-file-bad", true,
|
||||
"singleton", server_uri, "singleton");
|
||||
return ORTE_ERROR;
|
||||
}
|
||||
fclose(fp);
|
||||
input[strlen(input)-1] = '\0'; /* remove newline */
|
||||
orte_process_info.my_hnp_uri = strdup(input);
|
||||
} else {
|
||||
orte_process_info.my_hnp_uri = strdup(server_uri);
|
||||
}
|
||||
/* save the daemon uri - we will process it later */
|
||||
orte_process_info.my_daemon_uri = strdup(orte_process_info.my_hnp_uri);
|
||||
/* indicate we are a singleton so orte_init knows what to do */
|
||||
orte_process_info.proc_type |= ORTE_PROC_SINGLETON;
|
||||
/* for convenience, push the pubsub version of this param into the environ */
|
||||
asprintf(¶m,"OMPI_MCA_pubsub_orte_server=%s",orte_process_info.my_hnp_uri);
|
||||
putenv(param);
|
||||
/* now define my own name */
|
||||
/* hash the nodename */
|
||||
OPAL_HASH_STR(orte_process_info.nodename, hash32);
|
||||
|
||||
bias = (uint32_t)orte_process_info.pid;
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_ess_base_output,
|
||||
"ess:singleton: initial bias %ld nodename hash %lu",
|
||||
(long)bias, (unsigned long)hash32));
|
||||
|
||||
/* fold in the bias */
|
||||
hash32 = hash32 ^ bias;
|
||||
|
||||
/* now compress to 16-bits */
|
||||
jobfam = (uint16_t)(((0x0000ffff & (0xffff0000 & hash32) >> 16)) ^ (0x0000ffff & hash32));
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_ess_base_output,
|
||||
"ess:singleton:: final jobfam %lu",
|
||||
(unsigned long)jobfam));
|
||||
|
||||
/* set the name */
|
||||
ORTE_PROC_MY_NAME->jobid = 0xffff0000 & ((uint32_t)jobfam << 16);
|
||||
ORTE_PROC_MY_NAME->vpid = 0;
|
||||
|
||||
} else {
|
||||
/*
|
||||
* If we are the selected module, then we must be a singleton
|
||||
* as it means that no other method for discovering a name
|
||||
* could be found. In this case, we need to start a daemon that
|
||||
* can support our operation. We must do this for two reasons:
|
||||
*
|
||||
* (1) if we try to play the role of the HNP, then any child processes
|
||||
* we might start via comm_spawn will rely on us for all ORTE-level
|
||||
* support. However, we can only progress those requests when the
|
||||
* the application calls into the OMPI/ORTE library! Thus, if this
|
||||
* singleton just does computation, the other processes will "hang"
|
||||
* in any calls into the ORTE layer that communicate with the HNP -
|
||||
* and most calls on application procs *do*.
|
||||
*
|
||||
* (2) daemons are used to communicate messages for administrative
|
||||
* purposes in a broadcast-like manner. Thus, daemons are expected
|
||||
* to be able to interpret specific commands. Our application process
|
||||
* doesn't have any idea how to handle those commands, thus causing
|
||||
* the entire ORTE administrative system to break down.
|
||||
*
|
||||
* For those reasons, we choose to fork/exec a daemon at this time
|
||||
* and then reconnect ourselves to it. We could just "fork" and declare
|
||||
* the child to be a daemon, but that would require we place *all* of the
|
||||
* daemon command processing code in the ORTE library, do some strange
|
||||
* mojo in a few places, etc. This doesn't seem worth it, so we'll just
|
||||
* do the old fork/exec here
|
||||
*
|
||||
* Note that Windows-based systems have to do their own special trick as
|
||||
* they don't support fork/exec. So we have to use a giant "if" here to
|
||||
* protect the Windows world. To make the results more readable, we put
|
||||
* the whole mess in a separate function below
|
||||
*/
|
||||
if (ORTE_SUCCESS != (rc= fork_hnp())) {
|
||||
/* if this didn't work, then we cannot support operation any further.
|
||||
* Abort the system and tell orte_init to exit
|
||||
*/
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
}
|
||||
|
||||
orte_process_info.num_procs = 1;
|
||||
@ -199,14 +286,14 @@ static int fork_hnp(void)
|
||||
int rc;
|
||||
|
||||
/* A pipe is used to communicate between the parent and child to
|
||||
indicate whether the exec ultimately succeeded or failed. The
|
||||
child sets the pipe to be close-on-exec; the child only ever
|
||||
writes anything to the pipe if there is an error (e.g.,
|
||||
executable not found, exec() fails, etc.). The parent does a
|
||||
blocking read on the pipe; if the pipe closed with no data,
|
||||
then the exec() succeeded. If the parent reads something from
|
||||
the pipe, then the child was letting us know that it failed.
|
||||
*/
|
||||
indicate whether the exec ultimately succeeded or failed. The
|
||||
child sets the pipe to be close-on-exec; the child only ever
|
||||
writes anything to the pipe if there is an error (e.g.,
|
||||
executable not found, exec() fails, etc.). The parent does a
|
||||
blocking read on the pipe; if the pipe closed with no data,
|
||||
then the exec() succeeded. If the parent reads something from
|
||||
the pipe, then the child was letting us know that it failed.
|
||||
*/
|
||||
if (pipe(p) < 0) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_SYS_LIMITS_PIPES);
|
||||
return ORTE_ERR_SYS_LIMITS_PIPES;
|
||||
@ -318,7 +405,7 @@ static int fork_hnp(void)
|
||||
exit(1);
|
||||
|
||||
} else {
|
||||
/* I am the parent - wait to hear something back and
|
||||
/* I am the parent - wait to hear something back and
|
||||
* report results
|
||||
*/
|
||||
close(p[1]); /* parent closes the write - orted will write its contact info to it*/
|
||||
@ -370,7 +457,7 @@ static int fork_hnp(void)
|
||||
/* likewise, since this is also the HNP, set that uri too */
|
||||
orte_process_info.my_hnp_uri = strdup(orted_uri);
|
||||
|
||||
/* indicate we are a singleton so orte_init knows what to do */
|
||||
/* indicate we are a singleton so orte_init knows what to do */
|
||||
orte_process_info.proc_type |= ORTE_PROC_SINGLETON;
|
||||
/* all done - report success */
|
||||
free(orted_uri);
|
||||
@ -499,7 +586,7 @@ static int fork_hnp(void)
|
||||
/* likewise, since this is also the HNP, set that uri too */
|
||||
orte_process_info.my_hnp_uri = strdup(orted_uri);
|
||||
|
||||
/* indicate we are a singleton so orte_init knows what to do */
|
||||
/* indicate we are a singleton so orte_init knows what to do */
|
||||
orte_process_info.proc_type |= ORTE_PROC_SINGLETON;
|
||||
/* all done - report success */
|
||||
free(orted_uri);
|
||||
|
@ -2178,6 +2178,10 @@ int orte_odls_base_default_require_sync(orte_process_name_t *proc,
|
||||
* come from a singleton
|
||||
*/
|
||||
if (!found) {
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_odls_globals.output,
|
||||
"%s odls: registering sync on singleton %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(proc)));
|
||||
child = OBJ_NEW(orte_odls_child_t);
|
||||
if (ORTE_SUCCESS != (rc = opal_dss.copy((void**)&child->name, proc, ORTE_NAME))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
|
@ -51,6 +51,11 @@ int orte_routed_base_register_sync(bool setup)
|
||||
orte_daemon_cmd_flag_t command=ORTE_DAEMON_SYNC_BY_PROC;
|
||||
char *rml_uri;
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_routed_base_output,
|
||||
"%s registering sync to daemon %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_DAEMON)));
|
||||
|
||||
/* we need to get the oob to establish
|
||||
* the connection - the oob will leave the connection "alive"
|
||||
* thereafter so we can communicate readily
|
||||
@ -95,6 +100,9 @@ int orte_routed_base_register_sync(bool setup)
|
||||
* gets serviced by the event library on the orted prior to the
|
||||
* process exiting
|
||||
*/
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_routed_base_output,
|
||||
"%s registering sync waiting for ack",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
sync_recvd = false;
|
||||
rc = orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_SYNC,
|
||||
ORTE_RML_NON_PERSISTENT, report_sync, NULL);
|
||||
@ -105,6 +113,10 @@ int orte_routed_base_register_sync(bool setup)
|
||||
|
||||
ORTE_PROGRESSED_WAIT(sync_recvd, 0, 1);
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_routed_base_output,
|
||||
"%s registering sync ack recvd",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
@ -116,9 +128,15 @@ int orte_routed_base_process_callback(orte_jobid_t job, opal_buffer_t *buffer)
|
||||
char *rml_uri;
|
||||
orte_vpid_t vpid;
|
||||
int rc;
|
||||
|
||||
|
||||
if (ORTE_JOB_FAMILY(job) == ORTE_JOB_FAMILY(ORTE_PROC_MY_NAME->jobid)) {
|
||||
/* came from singleton - don't process it */
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
/* lookup the job object for this process */
|
||||
if (NULL == (jdata = orte_get_job_data_object(job))) {
|
||||
/* came from my job family - this is an error */
|
||||
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
|
||||
return ORTE_ERR_NOT_FOUND;
|
||||
}
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user