1
1

Allow singletons to use ompi-server for rendezvous via pubsub as well as comm_spawn without starting their own local daemons

This commit was SVN r23384.
Этот коммит содержится в:
Ralph Castain 2010-07-13 06:33:07 +00:00
родитель eee7541ae7
Коммит 570d19106b
4 изменённых файлов: 175 добавлений и 56 удалений

Просмотреть файл

@ -58,6 +58,7 @@
#include "orte/util/proc_info.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/mca/rml/rml.h"
#include "orte/orted/orted.h"
#include "orte/runtime/runtime.h"
#include "orte/runtime/orte_globals.h"
@ -186,11 +187,11 @@ int main(int argc, char *argv[])
tmp_env_var = NULL; /* Silence compiler warning */
#endif
/* Perform the standard init, but flag that we are a tool
* so that we only open up the communications infrastructure. No
* session directories will be created.
*/
if (ORTE_SUCCESS != (ret = orte_init(&argc, &argv, ORTE_PROC_TOOL))) {
/* don't want session directories */
orte_create_session_dirs = false;
/* Perform the standard init, but flag that we are an HNP */
if (ORTE_SUCCESS != (ret = orte_init(&argc, &argv, ORTE_PROC_HNP))) {
fprintf(stderr, "ompi-server: failed to initialize -- aborting\n");
exit(1);
}
@ -228,6 +229,15 @@ int main(int argc, char *argv[])
exit(1);
}
/* setup to listen for commands sent specifically to me */
ret = orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_DAEMON,
ORTE_RML_NON_PERSISTENT, orte_daemon_recv, NULL);
if (ret != ORTE_SUCCESS && ret != ORTE_ERR_NOT_IMPLEMENTED) {
ORTE_ERROR_LOG(ret);
orte_finalize();
exit(1);
}
/* Set signal handlers to catch kill signals so we can properly clean up
* after ourselves.
*/

Просмотреть файл

@ -32,6 +32,7 @@
#include <signal.h>
#include <errno.h>
#include "opal/hash_string.h"
#include "opal/util/argv.h"
#include "opal/util/path.h"
#include "opal/util/opal_sos.h"
@ -94,51 +95,137 @@ orte_ess_base_module_t orte_ess_singleton_module = {
static int rte_init(void)
{
int rc;
char *server_uri, *param;
uint16_t jobfam;
uint32_t hash32;
uint32_t bias;
/* run the prolog */
if (ORTE_SUCCESS != (rc = orte_ess_base_std_prolog())) {
ORTE_ERROR_LOG(rc);
return rc;
}
/*
* If we are the selected module, then we must be a singleton
* as it means that no other method for discovering a name
* could be found. In this case, we need to start a daemon that
* can support our operation. We must do this for two reasons:
*
* (1) if we try to play the role of the HNP, then any child processes
* we might start via comm_spawn will rely on us for all ORTE-level
* support. However, we can only progress those requests when the
* the application calls into the OMPI/ORTE library! Thus, if this
* singleton just does computation, the other processes will "hang"
* in any calls into the ORTE layer that communicate with the HNP -
* and most calls on application procs *do*.
*
* (2) daemons are used to communicate messages for administrative
* purposes in a broadcast-like manner. Thus, daemons are expected
* to be able to interpret specific commands. Our application process
* doesn't have any idea how to handle those commands, thus causing
* the entire ORTE administrative system to break down.
*
* For those reasons, we choose to fork/exec a daemon at this time
* and then reconnect ourselves to it. We could just "fork" and declare
* the child to be a daemon, but that would require we place *all* of the
* daemon command processing code in the ORTE library, do some strange
* mojo in a few places, etc. This doesn't seem worth it, so we'll just
* do the old fork/exec here
*
* Note that Windows-based systems have to do their own special trick as
* they don't support fork/exec. So we have to use a giant "if" here to
* protect the Windows world. To make the results more readable, we put
* the whole mess in a separate function below
*/
if (ORTE_SUCCESS != (rc= fork_hnp())) {
/* if this didn't work, then we cannot support operation any further.
* Abort the system and tell orte_init to exit
*/
ORTE_ERROR_LOG(rc);
return rc;
/* look for the ompi-server MCA param */
mca_base_param_reg_string_name("orte", "server",
"Server to be used as HNP - [file|FILE]:<filename> or just uri",
false, false, NULL, &server_uri);
if (NULL != server_uri) {
/* we are going to connect to a server HNP */
if (0 == strncmp(server_uri, "file", strlen("file")) ||
0 == strncmp(server_uri, "FILE", strlen("FILE"))) {
char input[1024], *filename;
FILE *fp;
/* it is a file - get the filename */
filename = strchr(server_uri, ':');
if (NULL == filename) {
/* filename is not correctly formatted */
orte_show_help("help-orterun.txt", "orterun:ompi-server-filename-bad", true,
"singleton", server_uri);
return ORTE_ERROR;
}
++filename; /* space past the : */
if (0 >= strlen(filename)) {
/* they forgot to give us the name! */
orte_show_help("help-orterun.txt", "orterun:ompi-server-filename-missing", true,
"singleton", server_uri);
return ORTE_ERROR;
}
/* open the file and extract the uri */
fp = fopen(filename, "r");
if (NULL == fp) { /* can't find or read file! */
orte_show_help("help-orterun.txt", "orterun:ompi-server-filename-access", true,
"singleton", server_uri);
return ORTE_ERROR;
}
if (NULL == fgets(input, 1024, fp)) {
/* something malformed about file */
fclose(fp);
orte_show_help("help-orterun.txt", "orterun:ompi-server-file-bad", true,
"singleton", server_uri, "singleton");
return ORTE_ERROR;
}
fclose(fp);
input[strlen(input)-1] = '\0'; /* remove newline */
orte_process_info.my_hnp_uri = strdup(input);
} else {
orte_process_info.my_hnp_uri = strdup(server_uri);
}
/* save the daemon uri - we will process it later */
orte_process_info.my_daemon_uri = strdup(orte_process_info.my_hnp_uri);
/* indicate we are a singleton so orte_init knows what to do */
orte_process_info.proc_type |= ORTE_PROC_SINGLETON;
/* for convenience, push the pubsub version of this param into the environ */
asprintf(&param,"OMPI_MCA_pubsub_orte_server=%s",orte_process_info.my_hnp_uri);
putenv(param);
/* now define my own name */
/* hash the nodename */
OPAL_HASH_STR(orte_process_info.nodename, hash32);
bias = (uint32_t)orte_process_info.pid;
OPAL_OUTPUT_VERBOSE((5, orte_ess_base_output,
"ess:singleton: initial bias %ld nodename hash %lu",
(long)bias, (unsigned long)hash32));
/* fold in the bias */
hash32 = hash32 ^ bias;
/* now compress to 16-bits */
jobfam = (uint16_t)(((0x0000ffff & (0xffff0000 & hash32) >> 16)) ^ (0x0000ffff & hash32));
OPAL_OUTPUT_VERBOSE((5, orte_ess_base_output,
"ess:singleton:: final jobfam %lu",
(unsigned long)jobfam));
/* set the name */
ORTE_PROC_MY_NAME->jobid = 0xffff0000 & ((uint32_t)jobfam << 16);
ORTE_PROC_MY_NAME->vpid = 0;
} else {
/*
* If we are the selected module, then we must be a singleton
* as it means that no other method for discovering a name
* could be found. In this case, we need to start a daemon that
* can support our operation. We must do this for two reasons:
*
* (1) if we try to play the role of the HNP, then any child processes
* we might start via comm_spawn will rely on us for all ORTE-level
* support. However, we can only progress those requests when the
* the application calls into the OMPI/ORTE library! Thus, if this
* singleton just does computation, the other processes will "hang"
* in any calls into the ORTE layer that communicate with the HNP -
* and most calls on application procs *do*.
*
* (2) daemons are used to communicate messages for administrative
* purposes in a broadcast-like manner. Thus, daemons are expected
* to be able to interpret specific commands. Our application process
* doesn't have any idea how to handle those commands, thus causing
* the entire ORTE administrative system to break down.
*
* For those reasons, we choose to fork/exec a daemon at this time
* and then reconnect ourselves to it. We could just "fork" and declare
* the child to be a daemon, but that would require we place *all* of the
* daemon command processing code in the ORTE library, do some strange
* mojo in a few places, etc. This doesn't seem worth it, so we'll just
* do the old fork/exec here
*
* Note that Windows-based systems have to do their own special trick as
* they don't support fork/exec. So we have to use a giant "if" here to
* protect the Windows world. To make the results more readable, we put
* the whole mess in a separate function below
*/
if (ORTE_SUCCESS != (rc= fork_hnp())) {
/* if this didn't work, then we cannot support operation any further.
* Abort the system and tell orte_init to exit
*/
ORTE_ERROR_LOG(rc);
return rc;
}
}
orte_process_info.num_procs = 1;
@ -199,14 +286,14 @@ static int fork_hnp(void)
int rc;
/* A pipe is used to communicate between the parent and child to
indicate whether the exec ultimately succeeded or failed. The
child sets the pipe to be close-on-exec; the child only ever
writes anything to the pipe if there is an error (e.g.,
executable not found, exec() fails, etc.). The parent does a
blocking read on the pipe; if the pipe closed with no data,
then the exec() succeeded. If the parent reads something from
the pipe, then the child was letting us know that it failed.
*/
indicate whether the exec ultimately succeeded or failed. The
child sets the pipe to be close-on-exec; the child only ever
writes anything to the pipe if there is an error (e.g.,
executable not found, exec() fails, etc.). The parent does a
blocking read on the pipe; if the pipe closed with no data,
then the exec() succeeded. If the parent reads something from
the pipe, then the child was letting us know that it failed.
*/
if (pipe(p) < 0) {
ORTE_ERROR_LOG(ORTE_ERR_SYS_LIMITS_PIPES);
return ORTE_ERR_SYS_LIMITS_PIPES;
@ -318,7 +405,7 @@ static int fork_hnp(void)
exit(1);
} else {
/* I am the parent - wait to hear something back and
/* I am the parent - wait to hear something back and
* report results
*/
close(p[1]); /* parent closes the write - orted will write its contact info to it*/
@ -370,7 +457,7 @@ static int fork_hnp(void)
/* likewise, since this is also the HNP, set that uri too */
orte_process_info.my_hnp_uri = strdup(orted_uri);
/* indicate we are a singleton so orte_init knows what to do */
/* indicate we are a singleton so orte_init knows what to do */
orte_process_info.proc_type |= ORTE_PROC_SINGLETON;
/* all done - report success */
free(orted_uri);
@ -499,7 +586,7 @@ static int fork_hnp(void)
/* likewise, since this is also the HNP, set that uri too */
orte_process_info.my_hnp_uri = strdup(orted_uri);
/* indicate we are a singleton so orte_init knows what to do */
/* indicate we are a singleton so orte_init knows what to do */
orte_process_info.proc_type |= ORTE_PROC_SINGLETON;
/* all done - report success */
free(orted_uri);

Просмотреть файл

@ -2178,6 +2178,10 @@ int orte_odls_base_default_require_sync(orte_process_name_t *proc,
* come from a singleton
*/
if (!found) {
OPAL_OUTPUT_VERBOSE((5, orte_odls_globals.output,
"%s odls: registering sync on singleton %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(proc)));
child = OBJ_NEW(orte_odls_child_t);
if (ORTE_SUCCESS != (rc = opal_dss.copy((void**)&child->name, proc, ORTE_NAME))) {
ORTE_ERROR_LOG(rc);

Просмотреть файл

@ -51,6 +51,11 @@ int orte_routed_base_register_sync(bool setup)
orte_daemon_cmd_flag_t command=ORTE_DAEMON_SYNC_BY_PROC;
char *rml_uri;
OPAL_OUTPUT_VERBOSE((5, orte_routed_base_output,
"%s registering sync to daemon %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(ORTE_PROC_MY_DAEMON)));
/* we need to get the oob to establish
* the connection - the oob will leave the connection "alive"
* thereafter so we can communicate readily
@ -95,6 +100,9 @@ int orte_routed_base_register_sync(bool setup)
* gets serviced by the event library on the orted prior to the
* process exiting
*/
OPAL_OUTPUT_VERBOSE((5, orte_routed_base_output,
"%s registering sync waiting for ack",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
sync_recvd = false;
rc = orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_SYNC,
ORTE_RML_NON_PERSISTENT, report_sync, NULL);
@ -105,6 +113,10 @@ int orte_routed_base_register_sync(bool setup)
ORTE_PROGRESSED_WAIT(sync_recvd, 0, 1);
OPAL_OUTPUT_VERBOSE((5, orte_routed_base_output,
"%s registering sync ack recvd",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
return ORTE_SUCCESS;
}
@ -116,9 +128,15 @@ int orte_routed_base_process_callback(orte_jobid_t job, opal_buffer_t *buffer)
char *rml_uri;
orte_vpid_t vpid;
int rc;
if (ORTE_JOB_FAMILY(job) == ORTE_JOB_FAMILY(ORTE_PROC_MY_NAME->jobid)) {
/* came from singleton - don't process it */
return ORTE_SUCCESS;
}
/* lookup the job object for this process */
if (NULL == (jdata = orte_get_job_data_object(job))) {
/* came from my job family - this is an error */
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
return ORTE_ERR_NOT_FOUND;
}