- First of two or three patches, in orte/util/proc_info.h:
Adapt orte_process_info to orte_proc_info, and change orte_proc_info() to orte_proc_info_init(). - Compiled on linux-x86-64 - Discussed with Ralph This commit was SVN r20739.
Этот коммит содержится в:
родитель
39796e2a56
Коммит
781caee0b6
@ -175,8 +175,8 @@ int ompi_attr_create_predefined(void)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* check the app_num - if it was set, then define it - otherwise, don't */
|
/* check the app_num - if it was set, then define it - otherwise, don't */
|
||||||
if (orte_process_info.app_num >= 0) {
|
if (orte_proc_info.app_num >= 0) {
|
||||||
ret = set_f(MPI_APPNUM, orte_process_info.app_num);
|
ret = set_f(MPI_APPNUM, orte_proc_info.app_num);
|
||||||
}
|
}
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
|
@ -177,8 +177,8 @@ static void backend_fatal_aggregate(char *type,
|
|||||||
arg = va_arg(arglist, char*);
|
arg = va_arg(arglist, char*);
|
||||||
va_end(arglist);
|
va_end(arglist);
|
||||||
|
|
||||||
asprintf(&prefix, "[%s:%d]", orte_process_info.nodename,
|
asprintf(&prefix, "[%s:%d]", orte_proc_info.nodename,
|
||||||
(int) orte_process_info.pid);
|
(int) orte_proc_info.pid);
|
||||||
|
|
||||||
if (NULL != error_code) {
|
if (NULL != error_code) {
|
||||||
err_msg = ompi_mpi_errnum_get_string(*error_code);
|
err_msg = ompi_mpi_errnum_get_string(*error_code);
|
||||||
|
@ -66,7 +66,7 @@ void mca_btl_base_error_no_nics(const char* transport,
|
|||||||
asprintf(&procid, "%s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
asprintf(&procid, "%s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
||||||
|
|
||||||
orte_show_help("help-mpi-btl-base.txt", "btl:no-nics",
|
orte_show_help("help-mpi-btl-base.txt", "btl:no-nics",
|
||||||
true, procid, transport, orte_process_info.nodename,
|
true, procid, transport, orte_proc_info.nodename,
|
||||||
nic_name);
|
nic_name);
|
||||||
free(procid);
|
free(procid);
|
||||||
}
|
}
|
||||||
|
@ -38,7 +38,7 @@ OMPI_DECLSPEC extern int mca_btl_base_out(const char*, ...);
|
|||||||
#define BTL_OUTPUT(args) \
|
#define BTL_OUTPUT(args) \
|
||||||
do { \
|
do { \
|
||||||
mca_btl_base_out("[%s]%s[%s:%d:%s] ", \
|
mca_btl_base_out("[%s]%s[%s:%d:%s] ", \
|
||||||
orte_process_info.nodename, \
|
orte_proc_info.nodename, \
|
||||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), \
|
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), \
|
||||||
__FILE__, __LINE__, __func__); \
|
__FILE__, __LINE__, __func__); \
|
||||||
mca_btl_base_out args; \
|
mca_btl_base_out args; \
|
||||||
@ -49,7 +49,7 @@ do { \
|
|||||||
#define BTL_ERROR(args) \
|
#define BTL_ERROR(args) \
|
||||||
do { \
|
do { \
|
||||||
mca_btl_base_err("[%s]%s[%s:%d:%s] ", \
|
mca_btl_base_err("[%s]%s[%s:%d:%s] ", \
|
||||||
orte_process_info.nodename, \
|
orte_proc_info.nodename, \
|
||||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), \
|
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), \
|
||||||
__FILE__, __LINE__, __func__); \
|
__FILE__, __LINE__, __func__); \
|
||||||
mca_btl_base_err args; \
|
mca_btl_base_err args; \
|
||||||
@ -61,7 +61,7 @@ do { \
|
|||||||
mca_btl_base_err("%s[%s:%d:%s] from %s ", \
|
mca_btl_base_err("%s[%s:%d:%s] from %s ", \
|
||||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), \
|
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), \
|
||||||
__FILE__, __LINE__, __func__, \
|
__FILE__, __LINE__, __func__, \
|
||||||
orte_process_info.nodename); \
|
orte_proc_info.nodename); \
|
||||||
if(proc && proc->proc_hostname) { \
|
if(proc && proc->proc_hostname) { \
|
||||||
mca_btl_base_err("to: %s ", proc->proc_hostname); \
|
mca_btl_base_err("to: %s ", proc->proc_hostname); \
|
||||||
} \
|
} \
|
||||||
@ -75,7 +75,7 @@ do { \
|
|||||||
do { \
|
do { \
|
||||||
if(mca_btl_base_verbose > 0) { \
|
if(mca_btl_base_verbose > 0) { \
|
||||||
mca_btl_base_err("[%s]%s[%s:%d:%s] ", \
|
mca_btl_base_err("[%s]%s[%s:%d:%s] ", \
|
||||||
orte_process_info.nodename, \
|
orte_proc_info.nodename, \
|
||||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), \
|
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), \
|
||||||
__FILE__, __LINE__, __func__); \
|
__FILE__, __LINE__, __func__); \
|
||||||
mca_btl_base_err args; \
|
mca_btl_base_err args; \
|
||||||
|
@ -72,7 +72,7 @@ static int mca_btl_elan_add_procs( struct mca_btl_base_module_t* btl,
|
|||||||
FILE* file;
|
FILE* file;
|
||||||
ELAN_BASE* base;
|
ELAN_BASE* base;
|
||||||
|
|
||||||
filename = opal_os_path( false, orte_process_info.proc_session_dir, "ELAN_ID", NULL );
|
filename = opal_os_path( false, orte_proc_info.proc_session_dir, "ELAN_ID", NULL );
|
||||||
file = fopen( filename, "w" );
|
file = fopen( filename, "w" );
|
||||||
fprintf( file, "%s %d\n", ompi_proc_local_proc->proc_hostname, elan_btl->elan_position );
|
fprintf( file, "%s %d\n", ompi_proc_local_proc->proc_hostname, elan_btl->elan_position );
|
||||||
|
|
||||||
|
@ -123,13 +123,13 @@ static void show_init_error(const char *file, int line,
|
|||||||
}
|
}
|
||||||
|
|
||||||
orte_show_help("help-mpi-btl-openib.txt", "init-fail-no-mem",
|
orte_show_help("help-mpi-btl-openib.txt", "init-fail-no-mem",
|
||||||
true, orte_process_info.nodename,
|
true, orte_proc_info.nodename,
|
||||||
file, line, func, dev, str_limit);
|
file, line, func, dev, str_limit);
|
||||||
|
|
||||||
if (NULL != str_limit) free(str_limit);
|
if (NULL != str_limit) free(str_limit);
|
||||||
} else {
|
} else {
|
||||||
orte_show_help("help-mpi-btl-openib.txt", "init-fail-create-q",
|
orte_show_help("help-mpi-btl-openib.txt", "init-fail-create-q",
|
||||||
true, orte_process_info.nodename,
|
true, orte_proc_info.nodename,
|
||||||
file, line, func, strerror(errno), errno, dev);
|
file, line, func, strerror(errno), errno, dev);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -293,7 +293,7 @@ static int btl_openib_async_deviceh(struct mca_btl_openib_async_poll *devices_po
|
|||||||
case IBV_EVENT_SRQ_ERR:
|
case IBV_EVENT_SRQ_ERR:
|
||||||
case IBV_EVENT_PORT_ERR:
|
case IBV_EVENT_PORT_ERR:
|
||||||
orte_show_help("help-mpi-btl-openib.txt", "of error event",
|
orte_show_help("help-mpi-btl-openib.txt", "of error event",
|
||||||
true,orte_process_info.nodename, orte_process_info.pid,
|
true,orte_proc_info.nodename, orte_proc_info.pid,
|
||||||
event.event_type, openib_event_to_str(event.event_type),
|
event.event_type, openib_event_to_str(event.event_type),
|
||||||
xrc_event ? "true" : "false");
|
xrc_event ? "true" : "false");
|
||||||
break;
|
break;
|
||||||
@ -311,7 +311,7 @@ static int btl_openib_async_deviceh(struct mca_btl_openib_async_poll *devices_po
|
|||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
orte_show_help("help-mpi-btl-openib.txt", "of unknown event",
|
orte_show_help("help-mpi-btl-openib.txt", "of unknown event",
|
||||||
true,orte_process_info.nodename, orte_process_info.pid,
|
true,orte_proc_info.nodename, orte_proc_info.pid,
|
||||||
event.event_type, xrc_event ? "true" : "false");
|
event.event_type, xrc_event ? "true" : "false");
|
||||||
}
|
}
|
||||||
ibv_ack_async_event(&event);
|
ibv_ack_async_event(&event);
|
||||||
|
@ -591,7 +591,7 @@ static int init_one_port(opal_list_t *btl_list, mca_btl_openib_device_t *device,
|
|||||||
IB_DEFAULT_GID_PREFIX == subnet_id &&
|
IB_DEFAULT_GID_PREFIX == subnet_id &&
|
||||||
mca_btl_openib_component.warn_default_gid_prefix) {
|
mca_btl_openib_component.warn_default_gid_prefix) {
|
||||||
orte_show_help("help-mpi-btl-openib.txt", "default subnet prefix",
|
orte_show_help("help-mpi-btl-openib.txt", "default subnet prefix",
|
||||||
true, orte_process_info.nodename);
|
true, orte_proc_info.nodename);
|
||||||
}
|
}
|
||||||
|
|
||||||
lmc = (1 << ib_port_attr->lmc);
|
lmc = (1 << ib_port_attr->lmc);
|
||||||
@ -949,7 +949,7 @@ static int prepare_device_for_use(mca_btl_openib_device_t *device)
|
|||||||
"XRC on device without XRC support", true,
|
"XRC on device without XRC support", true,
|
||||||
mca_btl_openib_component.num_xrc_qps,
|
mca_btl_openib_component.num_xrc_qps,
|
||||||
ibv_get_device_name(device->ib_dev),
|
ibv_get_device_name(device->ib_dev),
|
||||||
orte_process_info.nodename);
|
orte_proc_info.nodename);
|
||||||
return OMPI_ERROR;
|
return OMPI_ERROR;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1237,7 +1237,7 @@ static int setup_qps(void)
|
|||||||
if (0 == opal_argv_count(queues)) {
|
if (0 == opal_argv_count(queues)) {
|
||||||
orte_show_help("help-mpi-btl-openib.txt",
|
orte_show_help("help-mpi-btl-openib.txt",
|
||||||
"no qps in receive_queues", true,
|
"no qps in receive_queues", true,
|
||||||
orte_process_info.nodename,
|
orte_proc_info.nodename,
|
||||||
mca_btl_openib_component.receive_queues);
|
mca_btl_openib_component.receive_queues);
|
||||||
ret = OMPI_ERROR;
|
ret = OMPI_ERROR;
|
||||||
goto error;
|
goto error;
|
||||||
@ -1256,7 +1256,7 @@ static int setup_qps(void)
|
|||||||
num_xrc_qps++;
|
num_xrc_qps++;
|
||||||
#else
|
#else
|
||||||
orte_show_help("help-mpi-btl-openib.txt", "No XRC support", true,
|
orte_show_help("help-mpi-btl-openib.txt", "No XRC support", true,
|
||||||
orte_process_info.nodename,
|
orte_proc_info.nodename,
|
||||||
mca_btl_openib_component.receive_queues);
|
mca_btl_openib_component.receive_queues);
|
||||||
ret = OMPI_ERR_NOT_AVAILABLE;
|
ret = OMPI_ERR_NOT_AVAILABLE;
|
||||||
goto error;
|
goto error;
|
||||||
@ -1264,7 +1264,7 @@ static int setup_qps(void)
|
|||||||
} else {
|
} else {
|
||||||
orte_show_help("help-mpi-btl-openib.txt",
|
orte_show_help("help-mpi-btl-openib.txt",
|
||||||
"invalid qp type in receive_queues", true,
|
"invalid qp type in receive_queues", true,
|
||||||
orte_process_info.nodename,
|
orte_proc_info.nodename,
|
||||||
mca_btl_openib_component.receive_queues,
|
mca_btl_openib_component.receive_queues,
|
||||||
queues[qp]);
|
queues[qp]);
|
||||||
ret = OMPI_ERR_BAD_PARAM;
|
ret = OMPI_ERR_BAD_PARAM;
|
||||||
@ -1276,7 +1276,7 @@ static int setup_qps(void)
|
|||||||
and SRQ */
|
and SRQ */
|
||||||
if (num_xrc_qps > 0 && (num_pp_qps > 0 || num_srq_qps > 0)) {
|
if (num_xrc_qps > 0 && (num_pp_qps > 0 || num_srq_qps > 0)) {
|
||||||
orte_show_help("help-mpi-btl-openib.txt", "XRC with PP or SRQ", true,
|
orte_show_help("help-mpi-btl-openib.txt", "XRC with PP or SRQ", true,
|
||||||
orte_process_info.nodename,
|
orte_proc_info.nodename,
|
||||||
mca_btl_openib_component.receive_queues);
|
mca_btl_openib_component.receive_queues);
|
||||||
ret = OMPI_ERR_BAD_PARAM;
|
ret = OMPI_ERR_BAD_PARAM;
|
||||||
goto error;
|
goto error;
|
||||||
@ -1285,7 +1285,7 @@ static int setup_qps(void)
|
|||||||
/* Current XRC implementation can't used with btls_per_lid > 1 */
|
/* Current XRC implementation can't used with btls_per_lid > 1 */
|
||||||
if (num_xrc_qps > 0 && mca_btl_openib_component.btls_per_lid > 1) {
|
if (num_xrc_qps > 0 && mca_btl_openib_component.btls_per_lid > 1) {
|
||||||
orte_show_help("help-mpi-btl-openib.txt", "XRC with BTLs per LID",
|
orte_show_help("help-mpi-btl-openib.txt", "XRC with BTLs per LID",
|
||||||
true, orte_process_info.nodename,
|
true, orte_proc_info.nodename,
|
||||||
mca_btl_openib_component.receive_queues, num_xrc_qps);
|
mca_btl_openib_component.receive_queues, num_xrc_qps);
|
||||||
ret = OMPI_ERR_BAD_PARAM;
|
ret = OMPI_ERR_BAD_PARAM;
|
||||||
goto error;
|
goto error;
|
||||||
@ -1312,7 +1312,7 @@ static int setup_qps(void)
|
|||||||
if (count < 3 || count > 6) {
|
if (count < 3 || count > 6) {
|
||||||
orte_show_help("help-mpi-btl-openib.txt",
|
orte_show_help("help-mpi-btl-openib.txt",
|
||||||
"invalid pp qp specification", true,
|
"invalid pp qp specification", true,
|
||||||
orte_process_info.nodename, queues[qp]);
|
orte_proc_info.nodename, queues[qp]);
|
||||||
ret = OMPI_ERR_BAD_PARAM;
|
ret = OMPI_ERR_BAD_PARAM;
|
||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
@ -1343,7 +1343,7 @@ static int setup_qps(void)
|
|||||||
if (count < 3 || count > 5) {
|
if (count < 3 || count > 5) {
|
||||||
orte_show_help("help-mpi-btl-openib.txt",
|
orte_show_help("help-mpi-btl-openib.txt",
|
||||||
"invalid srq specification", true,
|
"invalid srq specification", true,
|
||||||
orte_process_info.nodename, queues[qp]);
|
orte_proc_info.nodename, queues[qp]);
|
||||||
ret = OMPI_ERR_BAD_PARAM;
|
ret = OMPI_ERR_BAD_PARAM;
|
||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
@ -1367,7 +1367,7 @@ static int setup_qps(void)
|
|||||||
|
|
||||||
if (rd_num <= rd_low) {
|
if (rd_num <= rd_low) {
|
||||||
orte_show_help("help-mpi-btl-openib.txt", "rd_num must be > rd_low",
|
orte_show_help("help-mpi-btl-openib.txt", "rd_num must be > rd_low",
|
||||||
true, orte_process_info.nodename, queues[qp]);
|
true, orte_proc_info.nodename, queues[qp]);
|
||||||
ret = OMPI_ERR_BAD_PARAM;
|
ret = OMPI_ERR_BAD_PARAM;
|
||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
@ -1388,21 +1388,21 @@ static int setup_qps(void)
|
|||||||
if (max_qp_size < max_size_needed) {
|
if (max_qp_size < max_size_needed) {
|
||||||
orte_show_help("help-mpi-btl-openib.txt",
|
orte_show_help("help-mpi-btl-openib.txt",
|
||||||
"biggest qp size is too small", true,
|
"biggest qp size is too small", true,
|
||||||
orte_process_info.nodename, max_qp_size,
|
orte_proc_info.nodename, max_qp_size,
|
||||||
max_size_needed);
|
max_size_needed);
|
||||||
ret = OMPI_ERR_BAD_PARAM;
|
ret = OMPI_ERR_BAD_PARAM;
|
||||||
goto error;
|
goto error;
|
||||||
} else if (max_qp_size > max_size_needed) {
|
} else if (max_qp_size > max_size_needed) {
|
||||||
orte_show_help("help-mpi-btl-openib.txt",
|
orte_show_help("help-mpi-btl-openib.txt",
|
||||||
"biggest qp size is too big", true,
|
"biggest qp size is too big", true,
|
||||||
orte_process_info.nodename, max_qp_size,
|
orte_proc_info.nodename, max_qp_size,
|
||||||
max_size_needed);
|
max_size_needed);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (mca_btl_openib_component.ib_free_list_max > 0 &&
|
if (mca_btl_openib_component.ib_free_list_max > 0 &&
|
||||||
min_freelist_size > mca_btl_openib_component.ib_free_list_max) {
|
min_freelist_size > mca_btl_openib_component.ib_free_list_max) {
|
||||||
orte_show_help("help-mpi-btl-openib.txt", "freelist too small", true,
|
orte_show_help("help-mpi-btl-openib.txt", "freelist too small", true,
|
||||||
orte_process_info.nodename,
|
orte_proc_info.nodename,
|
||||||
mca_btl_openib_component.ib_free_list_max,
|
mca_btl_openib_component.ib_free_list_max,
|
||||||
min_freelist_size);
|
min_freelist_size);
|
||||||
ret = OMPI_ERR_BAD_PARAM;
|
ret = OMPI_ERR_BAD_PARAM;
|
||||||
@ -1487,7 +1487,7 @@ static int init_one_device(opal_list_t *btl_list, struct ibv_device* ib_dev)
|
|||||||
if (mca_btl_openib_component.warn_no_device_params_found) {
|
if (mca_btl_openib_component.warn_no_device_params_found) {
|
||||||
orte_show_help("help-mpi-btl-openib.txt",
|
orte_show_help("help-mpi-btl-openib.txt",
|
||||||
"no device params found", true,
|
"no device params found", true,
|
||||||
orte_process_info.nodename,
|
orte_proc_info.nodename,
|
||||||
ibv_get_device_name(device->ib_dev),
|
ibv_get_device_name(device->ib_dev),
|
||||||
device->ib_dev_attr.vendor_id,
|
device->ib_dev_attr.vendor_id,
|
||||||
device->ib_dev_attr.vendor_part_id);
|
device->ib_dev_attr.vendor_part_id);
|
||||||
@ -1593,7 +1593,7 @@ static int init_one_device(opal_list_t *btl_list, struct ibv_device* ib_dev)
|
|||||||
#endif
|
#endif
|
||||||
if (NULL == cq) {
|
if (NULL == cq) {
|
||||||
orte_show_help("help-mpi-btl-openib.txt", "init-fail-create-q",
|
orte_show_help("help-mpi-btl-openib.txt", "init-fail-create-q",
|
||||||
true, orte_process_info.nodename,
|
true, orte_proc_info.nodename,
|
||||||
__FILE__, __LINE__, "ibv_create_cq",
|
__FILE__, __LINE__, "ibv_create_cq",
|
||||||
strerror(errno), errno,
|
strerror(errno), errno,
|
||||||
ibv_get_device_name(device->ib_dev));
|
ibv_get_device_name(device->ib_dev));
|
||||||
@ -1649,7 +1649,7 @@ static int init_one_device(opal_list_t *btl_list, struct ibv_device* ib_dev)
|
|||||||
mca_btl_openib_component.receive_queues)) {
|
mca_btl_openib_component.receive_queues)) {
|
||||||
orte_show_help("help-mpi-btl-openib.txt",
|
orte_show_help("help-mpi-btl-openib.txt",
|
||||||
"conflicting receive_queues", true,
|
"conflicting receive_queues", true,
|
||||||
orte_process_info.nodename,
|
orte_proc_info.nodename,
|
||||||
ibv_get_device_name(device->ib_dev),
|
ibv_get_device_name(device->ib_dev),
|
||||||
device->ib_dev_attr.vendor_id,
|
device->ib_dev_attr.vendor_id,
|
||||||
device->ib_dev_attr.vendor_part_id,
|
device->ib_dev_attr.vendor_part_id,
|
||||||
@ -1699,7 +1699,7 @@ static int init_one_device(opal_list_t *btl_list, struct ibv_device* ib_dev)
|
|||||||
"XRC on device without XRC support", true,
|
"XRC on device without XRC support", true,
|
||||||
mca_btl_openib_component.num_xrc_qps,
|
mca_btl_openib_component.num_xrc_qps,
|
||||||
ibv_get_device_name(device->ib_dev),
|
ibv_get_device_name(device->ib_dev),
|
||||||
orte_process_info.nodename);
|
orte_proc_info.nodename);
|
||||||
ret = OMPI_SUCCESS;
|
ret = OMPI_SUCCESS;
|
||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
@ -1823,7 +1823,7 @@ error:
|
|||||||
if (OMPI_SUCCESS != ret) {
|
if (OMPI_SUCCESS != ret) {
|
||||||
orte_show_help("help-mpi-btl-openib.txt",
|
orte_show_help("help-mpi-btl-openib.txt",
|
||||||
"error in device init", true,
|
"error in device init", true,
|
||||||
orte_process_info.nodename,
|
orte_proc_info.nodename,
|
||||||
ibv_get_device_name(device->ib_dev));
|
ibv_get_device_name(device->ib_dev));
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2086,7 +2086,7 @@ btl_openib_component_init(int *num_btl_modules,
|
|||||||
((OPAL_MEMORY_FREE_SUPPORT | OPAL_MEMORY_MUNMAP_SUPPORT) & value)) {
|
((OPAL_MEMORY_FREE_SUPPORT | OPAL_MEMORY_MUNMAP_SUPPORT) & value)) {
|
||||||
orte_show_help("help-mpi-btl-openib.txt",
|
orte_show_help("help-mpi-btl-openib.txt",
|
||||||
"ptmalloc2 with no threads", true,
|
"ptmalloc2 with no threads", true,
|
||||||
orte_process_info.nodename);
|
orte_proc_info.nodename);
|
||||||
goto no_btls;
|
goto no_btls;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
@ -2204,7 +2204,7 @@ btl_openib_component_init(int *num_btl_modules,
|
|||||||
if (mca_btl_openib_component.want_fork_support > 0) {
|
if (mca_btl_openib_component.want_fork_support > 0) {
|
||||||
orte_show_help("help-mpi-btl-openib.txt",
|
orte_show_help("help-mpi-btl-openib.txt",
|
||||||
"ibv_fork_init fail", true,
|
"ibv_fork_init fail", true,
|
||||||
orte_process_info.nodename);
|
orte_proc_info.nodename);
|
||||||
goto no_btls;
|
goto no_btls;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -2313,7 +2313,7 @@ btl_openib_component_init(int *num_btl_modules,
|
|||||||
free(dev_sorted);
|
free(dev_sorted);
|
||||||
if (!found) {
|
if (!found) {
|
||||||
orte_show_help("help-mpi-btl-openib.txt", "no devices right type",
|
orte_show_help("help-mpi-btl-openib.txt", "no devices right type",
|
||||||
true, orte_process_info.nodename,
|
true, orte_proc_info.nodename,
|
||||||
((BTL_OPENIB_DT_IB == mca_btl_openib_component.device_type) ?
|
((BTL_OPENIB_DT_IB == mca_btl_openib_component.device_type) ?
|
||||||
"InfiniBand" :
|
"InfiniBand" :
|
||||||
(BTL_OPENIB_DT_IWARP == mca_btl_openib_component.device_type) ?
|
(BTL_OPENIB_DT_IWARP == mca_btl_openib_component.device_type) ?
|
||||||
@ -2330,7 +2330,7 @@ btl_openib_component_init(int *num_btl_modules,
|
|||||||
mca_btl_openib_component.warn_nonexistent_if) {
|
mca_btl_openib_component.warn_nonexistent_if) {
|
||||||
char *str = opal_argv_join(mca_btl_openib_component.if_list, ',');
|
char *str = opal_argv_join(mca_btl_openib_component.if_list, ',');
|
||||||
orte_show_help("help-mpi-btl-openib.txt", "nonexistent port",
|
orte_show_help("help-mpi-btl-openib.txt", "nonexistent port",
|
||||||
true, orte_process_info.nodename,
|
true, orte_proc_info.nodename,
|
||||||
((NULL != mca_btl_openib_component.if_include) ?
|
((NULL != mca_btl_openib_component.if_include) ?
|
||||||
"in" : "ex"), str);
|
"in" : "ex"), str);
|
||||||
free(str);
|
free(str);
|
||||||
@ -2338,7 +2338,7 @@ btl_openib_component_init(int *num_btl_modules,
|
|||||||
|
|
||||||
if(0 == mca_btl_openib_component.ib_num_btls) {
|
if(0 == mca_btl_openib_component.ib_num_btls) {
|
||||||
orte_show_help("help-mpi-btl-openib.txt",
|
orte_show_help("help-mpi-btl-openib.txt",
|
||||||
"no active ports found", true, orte_process_info.nodename);
|
"no active ports found", true, orte_proc_info.nodename);
|
||||||
goto no_btls;
|
goto no_btls;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2385,7 +2385,7 @@ btl_openib_component_init(int *num_btl_modules,
|
|||||||
if (OMPI_SUCCESS != ret) {
|
if (OMPI_SUCCESS != ret) {
|
||||||
orte_show_help("help-mpi-btl-openib.txt",
|
orte_show_help("help-mpi-btl-openib.txt",
|
||||||
"error in device init", true,
|
"error in device init", true,
|
||||||
orte_process_info.nodename,
|
orte_proc_info.nodename,
|
||||||
ibv_get_device_name(device->ib_dev));
|
ibv_get_device_name(device->ib_dev));
|
||||||
goto no_btls;
|
goto no_btls;
|
||||||
}
|
}
|
||||||
@ -2924,24 +2924,24 @@ error:
|
|||||||
BTL_OPENIB_QP_TYPE_PP(qp) ?
|
BTL_OPENIB_QP_TYPE_PP(qp) ?
|
||||||
"pp rnr retry exceeded" :
|
"pp rnr retry exceeded" :
|
||||||
"srq rnr retry exceeded", true,
|
"srq rnr retry exceeded", true,
|
||||||
orte_process_info.nodename, device_name,
|
orte_proc_info.nodename, device_name,
|
||||||
peer_hostname);
|
peer_hostname);
|
||||||
orte_notifier.help(ORTE_NOTIFIER_INFRA, ORTE_ERR_COMM_FAILURE,
|
orte_notifier.help(ORTE_NOTIFIER_INFRA, ORTE_ERR_COMM_FAILURE,
|
||||||
"help-mpi-btl-openib.txt",
|
"help-mpi-btl-openib.txt",
|
||||||
BTL_OPENIB_QP_TYPE_PP(qp) ?
|
BTL_OPENIB_QP_TYPE_PP(qp) ?
|
||||||
"pp rnr retry exceeded" :
|
"pp rnr retry exceeded" :
|
||||||
"srq rnr retry exceeded",
|
"srq rnr retry exceeded",
|
||||||
orte_process_info.nodename, device_name,
|
orte_proc_info.nodename, device_name,
|
||||||
peer_hostname);
|
peer_hostname);
|
||||||
} else if (IBV_WC_RETRY_EXC_ERR == wc->status) {
|
} else if (IBV_WC_RETRY_EXC_ERR == wc->status) {
|
||||||
orte_show_help("help-mpi-btl-openib.txt",
|
orte_show_help("help-mpi-btl-openib.txt",
|
||||||
"pp retry exceeded", true,
|
"pp retry exceeded", true,
|
||||||
orte_process_info.nodename,
|
orte_proc_info.nodename,
|
||||||
device_name, peer_hostname);
|
device_name, peer_hostname);
|
||||||
orte_notifier.help(ORTE_NOTIFIER_INFRA, ORTE_ERR_COMM_FAILURE,
|
orte_notifier.help(ORTE_NOTIFIER_INFRA, ORTE_ERR_COMM_FAILURE,
|
||||||
"help-mpi-btl-openib.txt",
|
"help-mpi-btl-openib.txt",
|
||||||
"pp retry exceeded",
|
"pp retry exceeded",
|
||||||
orte_process_info.nodename,
|
orte_proc_info.nodename,
|
||||||
device_name, peer_hostname);
|
device_name, peer_hostname);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1087,7 +1087,7 @@ void *mca_btl_openib_endpoint_invoke_error(void *context)
|
|||||||
if (NULL == btl || NULL == btl->error_cb) {
|
if (NULL == btl || NULL == btl->error_cb) {
|
||||||
orte_show_help("help-mpi-btl-openib.txt",
|
orte_show_help("help-mpi-btl-openib.txt",
|
||||||
"cannot raise btl error", true,
|
"cannot raise btl error", true,
|
||||||
orte_process_info.nodename,
|
orte_proc_info.nodename,
|
||||||
__FILE__, __LINE__);
|
__FILE__, __LINE__);
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
|
@ -178,7 +178,7 @@ int btl_openib_register_mca_params(void)
|
|||||||
if (0 != ival) {
|
if (0 != ival) {
|
||||||
orte_show_help("help-mpi-btl-openib.txt",
|
orte_show_help("help-mpi-btl-openib.txt",
|
||||||
"ibv_fork requested but not supported", true,
|
"ibv_fork requested but not supported", true,
|
||||||
orte_process_info.nodename);
|
orte_proc_info.nodename);
|
||||||
return OMPI_ERROR;
|
return OMPI_ERROR;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -208,7 +208,7 @@ int btl_openib_register_mca_params(void)
|
|||||||
} else {
|
} else {
|
||||||
orte_show_help("help-mpi-btl-openib.txt",
|
orte_show_help("help-mpi-btl-openib.txt",
|
||||||
"ibv_fork requested but not supported", true,
|
"ibv_fork requested but not supported", true,
|
||||||
orte_process_info.nodename);
|
orte_proc_info.nodename);
|
||||||
return OMPI_ERROR;
|
return OMPI_ERROR;
|
||||||
}
|
}
|
||||||
free(str);
|
free(str);
|
||||||
@ -458,7 +458,7 @@ int btl_openib_register_mca_params(void)
|
|||||||
64, &ival, REGINT_GE_ZERO));
|
64, &ival, REGINT_GE_ZERO));
|
||||||
if(ival <= 1 || (ival & (ival - 1))) {
|
if(ival <= 1 || (ival & (ival - 1))) {
|
||||||
orte_show_help("help-mpi-btl-openib.txt", "wrong buffer alignment",
|
orte_show_help("help-mpi-btl-openib.txt", "wrong buffer alignment",
|
||||||
true, ival, orte_process_info.nodename, 64);
|
true, ival, orte_proc_info.nodename, 64);
|
||||||
mca_btl_openib_component.buffer_alignment = 64;
|
mca_btl_openib_component.buffer_alignment = 64;
|
||||||
} else {
|
} else {
|
||||||
mca_btl_openib_component.buffer_alignment = (uint32_t) ival;
|
mca_btl_openib_component.buffer_alignment = (uint32_t) ival;
|
||||||
|
@ -42,7 +42,7 @@ int mca_btl_openib_open_xrc_domain(struct mca_btl_openib_device_t *device)
|
|||||||
dev_name = ibv_get_device_name(device->ib_dev);
|
dev_name = ibv_get_device_name(device->ib_dev);
|
||||||
len = asprintf(&xrc_file_name,
|
len = asprintf(&xrc_file_name,
|
||||||
"%s"OPAL_PATH_SEP"openib_xrc_domain_%s",
|
"%s"OPAL_PATH_SEP"openib_xrc_domain_%s",
|
||||||
orte_process_info.job_session_dir, dev_name);
|
orte_proc_info.job_session_dir, dev_name);
|
||||||
if (0 > len) {
|
if (0 > len) {
|
||||||
BTL_ERROR(("Failed to allocate memomry for XRC file name\n",
|
BTL_ERROR(("Failed to allocate memomry for XRC file name\n",
|
||||||
strerror(errno)));
|
strerror(errno)));
|
||||||
|
@ -121,7 +121,7 @@ int ompi_btl_openib_connect_base_register(void)
|
|||||||
if (NULL == all[i]) {
|
if (NULL == all[i]) {
|
||||||
orte_show_help("help-mpi-btl-openib-cpc-base.txt",
|
orte_show_help("help-mpi-btl-openib-cpc-base.txt",
|
||||||
"cpc name not found", true,
|
"cpc name not found", true,
|
||||||
"include", orte_process_info.nodename,
|
"include", orte_proc_info.nodename,
|
||||||
"include", cpc_include, temp[j],
|
"include", cpc_include, temp[j],
|
||||||
all_cpc_names);
|
all_cpc_names);
|
||||||
opal_argv_free(temp);
|
opal_argv_free(temp);
|
||||||
@ -147,7 +147,7 @@ int ompi_btl_openib_connect_base_register(void)
|
|||||||
if (NULL == all[i]) {
|
if (NULL == all[i]) {
|
||||||
orte_show_help("help-mpi-btl-openib-cpc-base.txt",
|
orte_show_help("help-mpi-btl-openib-cpc-base.txt",
|
||||||
"cpc name not found", true,
|
"cpc name not found", true,
|
||||||
"exclude", orte_process_info.nodename,
|
"exclude", orte_proc_info.nodename,
|
||||||
"exclude", cpc_exclude, temp[j],
|
"exclude", cpc_exclude, temp[j],
|
||||||
all_cpc_names);
|
all_cpc_names);
|
||||||
opal_argv_free(temp);
|
opal_argv_free(temp);
|
||||||
@ -292,7 +292,7 @@ int ompi_btl_openib_connect_base_select_for_local_port(mca_btl_openib_module_t *
|
|||||||
if (0 == cpc_index) {
|
if (0 == cpc_index) {
|
||||||
orte_show_help("help-mpi-btl-openib-cpc-base.txt",
|
orte_show_help("help-mpi-btl-openib-cpc-base.txt",
|
||||||
"no cpcs for port", true,
|
"no cpcs for port", true,
|
||||||
orte_process_info.nodename,
|
orte_proc_info.nodename,
|
||||||
ibv_get_device_name(btl->device->ib_dev),
|
ibv_get_device_name(btl->device->ib_dev),
|
||||||
msg);
|
msg);
|
||||||
free(cpcs);
|
free(cpcs);
|
||||||
|
@ -923,7 +923,7 @@ static int qp_create_one(mca_btl_base_endpoint_t* endpoint, int qp,
|
|||||||
if (init_attr.cap.max_inline_data < req_inline) {
|
if (init_attr.cap.max_inline_data < req_inline) {
|
||||||
endpoint->qps[qp].ib_inline_max = init_attr.cap.max_inline_data;
|
endpoint->qps[qp].ib_inline_max = init_attr.cap.max_inline_data;
|
||||||
orte_show_help("help-mpi-btl-openib-cpc-base.txt",
|
orte_show_help("help-mpi-btl-openib-cpc-base.txt",
|
||||||
"inline truncated", orte_process_info.nodename,
|
"inline truncated", orte_proc_info.nodename,
|
||||||
ibv_get_device_name(openib_btl->device->ib_dev),
|
ibv_get_device_name(openib_btl->device->ib_dev),
|
||||||
req_inline, init_attr.cap.max_inline_data);
|
req_inline, init_attr.cap.max_inline_data);
|
||||||
} else {
|
} else {
|
||||||
@ -2314,7 +2314,7 @@ static int request_error(ibcm_listen_cm_id_t *cmh, struct ib_cm_event *event)
|
|||||||
if (IBV_WC_RESP_TIMEOUT_ERR != event->param.send_status) {
|
if (IBV_WC_RESP_TIMEOUT_ERR != event->param.send_status) {
|
||||||
orte_show_help("help-mpi-btl-openib-cpc-ibcm.txt",
|
orte_show_help("help-mpi-btl-openib-cpc-ibcm.txt",
|
||||||
"unhandled error", true,
|
"unhandled error", true,
|
||||||
"request", orte_process_info.nodename,
|
"request", orte_proc_info.nodename,
|
||||||
event->param.send_status);
|
event->param.send_status);
|
||||||
} else {
|
} else {
|
||||||
ibcm_request_t *req;
|
ibcm_request_t *req;
|
||||||
@ -2325,7 +2325,7 @@ static int request_error(ibcm_listen_cm_id_t *cmh, struct ib_cm_event *event)
|
|||||||
if (NULL == req) {
|
if (NULL == req) {
|
||||||
orte_show_help("help-mpi-btl-openib-cpc-ibcm.txt",
|
orte_show_help("help-mpi-btl-openib-cpc-ibcm.txt",
|
||||||
"timeout not found", true,
|
"timeout not found", true,
|
||||||
"request", orte_process_info.nodename);
|
"request", orte_proc_info.nodename);
|
||||||
} else {
|
} else {
|
||||||
endpoint = req->endpoint;
|
endpoint = req->endpoint;
|
||||||
}
|
}
|
||||||
@ -2346,7 +2346,7 @@ static int reply_error(ibcm_listen_cm_id_t *cmh, struct ib_cm_event *event)
|
|||||||
if (IBV_WC_RESP_TIMEOUT_ERR != event->param.send_status) {
|
if (IBV_WC_RESP_TIMEOUT_ERR != event->param.send_status) {
|
||||||
orte_show_help("help-mpi-btl-openib-cpc-ibcm.txt",
|
orte_show_help("help-mpi-btl-openib-cpc-ibcm.txt",
|
||||||
"unhandled error", true,
|
"unhandled error", true,
|
||||||
"reply", orte_process_info.nodename,
|
"reply", orte_proc_info.nodename,
|
||||||
event->param.send_status);
|
event->param.send_status);
|
||||||
} else {
|
} else {
|
||||||
ibcm_reply_t *rep;
|
ibcm_reply_t *rep;
|
||||||
@ -2357,7 +2357,7 @@ static int reply_error(ibcm_listen_cm_id_t *cmh, struct ib_cm_event *event)
|
|||||||
if (NULL == rep) {
|
if (NULL == rep) {
|
||||||
orte_show_help("help-mpi-btl-openib-cpc-ibcm.txt",
|
orte_show_help("help-mpi-btl-openib-cpc-ibcm.txt",
|
||||||
"timeout not found", true,
|
"timeout not found", true,
|
||||||
"reply", orte_process_info.nodename);
|
"reply", orte_proc_info.nodename);
|
||||||
} else {
|
} else {
|
||||||
endpoint = rep->endpoint;
|
endpoint = rep->endpoint;
|
||||||
}
|
}
|
||||||
|
@ -465,7 +465,7 @@ static int qp_create_one(mca_btl_base_endpoint_t* endpoint, int qp,
|
|||||||
if (init_attr.cap.max_inline_data < req_inline) {
|
if (init_attr.cap.max_inline_data < req_inline) {
|
||||||
endpoint->qps[qp].ib_inline_max = init_attr.cap.max_inline_data;
|
endpoint->qps[qp].ib_inline_max = init_attr.cap.max_inline_data;
|
||||||
orte_show_help("help-mpi-btl-openib-cpc-base.txt",
|
orte_show_help("help-mpi-btl-openib-cpc-base.txt",
|
||||||
"inline truncated", true, orte_process_info.nodename,
|
"inline truncated", true, orte_proc_info.nodename,
|
||||||
ibv_get_device_name(openib_btl->device->ib_dev),
|
ibv_get_device_name(openib_btl->device->ib_dev),
|
||||||
req_inline, init_attr.cap.max_inline_data);
|
req_inline, init_attr.cap.max_inline_data);
|
||||||
} else {
|
} else {
|
||||||
|
@ -426,7 +426,7 @@ static int rdmacm_setup_qp(rdmacm_contents_t *contents,
|
|||||||
endpoint->qps[qpnum].ib_inline_max = attr.cap.max_inline_data;
|
endpoint->qps[qpnum].ib_inline_max = attr.cap.max_inline_data;
|
||||||
orte_show_help("help-mpi-btl-openib-cpc-base.txt",
|
orte_show_help("help-mpi-btl-openib-cpc-base.txt",
|
||||||
"inline truncated", true,
|
"inline truncated", true,
|
||||||
orte_process_info.nodename,
|
orte_proc_info.nodename,
|
||||||
ibv_get_device_name(contents->openib_btl->device->ib_dev),
|
ibv_get_device_name(contents->openib_btl->device->ib_dev),
|
||||||
req_inline, attr.cap.max_inline_data);
|
req_inline, attr.cap.max_inline_data);
|
||||||
} else {
|
} else {
|
||||||
@ -722,14 +722,14 @@ static void *show_help_cant_find_endpoint(void *context)
|
|||||||
msg = stringify(c->peer_ip_addr);
|
msg = stringify(c->peer_ip_addr);
|
||||||
orte_show_help("help-mpi-btl-openib-cpc-rdmacm.txt",
|
orte_show_help("help-mpi-btl-openib-cpc-rdmacm.txt",
|
||||||
"could not find matching endpoint", true,
|
"could not find matching endpoint", true,
|
||||||
orte_process_info.nodename,
|
orte_proc_info.nodename,
|
||||||
c->device_name,
|
c->device_name,
|
||||||
c->peer_tcp_port);
|
c->peer_tcp_port);
|
||||||
free(msg);
|
free(msg);
|
||||||
} else {
|
} else {
|
||||||
orte_show_help("help-mpi-btl-openib-cpc-rdmacm.txt",
|
orte_show_help("help-mpi-btl-openib-cpc-rdmacm.txt",
|
||||||
"could not find matching endpoint", true,
|
"could not find matching endpoint", true,
|
||||||
orte_process_info.nodename,
|
orte_proc_info.nodename,
|
||||||
"<unknown>", "<unknown>", -1);
|
"<unknown>", "<unknown>", -1);
|
||||||
}
|
}
|
||||||
free(context);
|
free(context);
|
||||||
@ -1421,7 +1421,7 @@ static void *show_help_rdmacm_event_error(void *c)
|
|||||||
if (RDMA_CM_EVENT_DEVICE_REMOVAL == event->event) {
|
if (RDMA_CM_EVENT_DEVICE_REMOVAL == event->event) {
|
||||||
orte_show_help("help-mpi-btl-openib-cpc-rdmacm.txt",
|
orte_show_help("help-mpi-btl-openib-cpc-rdmacm.txt",
|
||||||
"rdma cm device removal", true,
|
"rdma cm device removal", true,
|
||||||
orte_process_info.nodename,
|
orte_proc_info.nodename,
|
||||||
ibv_get_device_name(event->id->verbs->device));
|
ibv_get_device_name(event->id->verbs->device));
|
||||||
} else {
|
} else {
|
||||||
const char *device = "Unknown";
|
const char *device = "Unknown";
|
||||||
@ -1432,7 +1432,7 @@ static void *show_help_rdmacm_event_error(void *c)
|
|||||||
}
|
}
|
||||||
orte_show_help("help-mpi-btl-openib-cpc-rdmacm.txt",
|
orte_show_help("help-mpi-btl-openib-cpc-rdmacm.txt",
|
||||||
"rdma cm event error", true,
|
"rdma cm event error", true,
|
||||||
orte_process_info.nodename,
|
orte_proc_info.nodename,
|
||||||
device,
|
device,
|
||||||
rdma_event_str(event->event),
|
rdma_event_str(event->event),
|
||||||
context->endpoint->endpoint_proc->proc_ompi->proc_hostname);
|
context->endpoint->endpoint_proc->proc_ompi->proc_hostname);
|
||||||
|
@ -411,7 +411,7 @@ static int xoob_send_qp_create (mca_btl_base_endpoint_t* endpoint)
|
|||||||
if (qp_init_attr.cap.max_inline_data < req_inline) {
|
if (qp_init_attr.cap.max_inline_data < req_inline) {
|
||||||
endpoint->qps[0].ib_inline_max = qp_init_attr.cap.max_inline_data;
|
endpoint->qps[0].ib_inline_max = qp_init_attr.cap.max_inline_data;
|
||||||
orte_show_help("help-mpi-btl-openib-cpc-base.txt",
|
orte_show_help("help-mpi-btl-openib-cpc-base.txt",
|
||||||
"inline truncated", orte_process_info.nodename,
|
"inline truncated", orte_proc_info.nodename,
|
||||||
ibv_get_device_name(openib_btl->device->ib_dev),
|
ibv_get_device_name(openib_btl->device->ib_dev),
|
||||||
req_inline, qp_init_attr.cap.max_inline_data);
|
req_inline, qp_init_attr.cap.max_inline_data);
|
||||||
} else {
|
} else {
|
||||||
|
@ -233,8 +233,8 @@ static int sm_btl_first_time_init(mca_btl_sm_t *sm_btl, int n)
|
|||||||
|
|
||||||
/* set file name */
|
/* set file name */
|
||||||
if(asprintf(&sm_ctl_file, "%s"OPAL_PATH_SEP"shared_mem_btl_module.%s",
|
if(asprintf(&sm_ctl_file, "%s"OPAL_PATH_SEP"shared_mem_btl_module.%s",
|
||||||
orte_process_info.job_session_dir,
|
orte_proc_info.job_session_dir,
|
||||||
orte_process_info.nodename) < 0)
|
orte_proc_info.nodename) < 0)
|
||||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||||
|
|
||||||
/* Pass in a data segment alignment of 0 to get no data
|
/* Pass in a data segment alignment of 0 to get no data
|
||||||
@ -371,7 +371,7 @@ create_sm_endpoint(int local_proc, struct ompi_proc_t *proc)
|
|||||||
OBJ_CONSTRUCT(&ep->pending_sends, opal_list_t);
|
OBJ_CONSTRUCT(&ep->pending_sends, opal_list_t);
|
||||||
#if OMPI_ENABLE_PROGRESS_THREADS == 1
|
#if OMPI_ENABLE_PROGRESS_THREADS == 1
|
||||||
sprintf(path, "%s"OPAL_PATH_SEP"sm_fifo.%lu",
|
sprintf(path, "%s"OPAL_PATH_SEP"sm_fifo.%lu",
|
||||||
orte_process_info.job_session_dir,
|
orte_proc_info.job_session_dir,
|
||||||
(unsigned long)proc->proc_name.vpid);
|
(unsigned long)proc->proc_name.vpid);
|
||||||
ep->fifo_fd = open(path, O_WRONLY);
|
ep->fifo_fd = open(path, O_WRONLY);
|
||||||
if(ep->fifo_fd < 0) {
|
if(ep->fifo_fd < 0) {
|
||||||
@ -848,7 +848,7 @@ int mca_btl_sm_ft_event(int state) {
|
|||||||
opal_crs_base_metadata_write_token(NULL, CRS_METADATA_TOUCH, mca_btl_sm_component.mmap_file->map_path);
|
opal_crs_base_metadata_write_token(NULL, CRS_METADATA_TOUCH, mca_btl_sm_component.mmap_file->map_path);
|
||||||
|
|
||||||
/* Record the job session directory */
|
/* Record the job session directory */
|
||||||
opal_crs_base_metadata_write_token(NULL, CRS_METADATA_MKDIR, orte_process_info.job_session_dir);
|
opal_crs_base_metadata_write_token(NULL, CRS_METADATA_MKDIR, orte_proc_info.job_session_dir);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if(OPAL_CRS_CONTINUE == state) {
|
else if(OPAL_CRS_CONTINUE == state) {
|
||||||
@ -868,7 +868,7 @@ int mca_btl_sm_ft_event(int state) {
|
|||||||
OPAL_CRS_RESTART_PRE == state) {
|
OPAL_CRS_RESTART_PRE == state) {
|
||||||
if( NULL != mca_btl_sm_component.mmap_file ) {
|
if( NULL != mca_btl_sm_component.mmap_file ) {
|
||||||
/* Add session directory */
|
/* Add session directory */
|
||||||
opal_crs_base_cleanup_append(orte_process_info.job_session_dir, true);
|
opal_crs_base_cleanup_append(orte_proc_info.job_session_dir, true);
|
||||||
/* Add shared memory file */
|
/* Add shared memory file */
|
||||||
opal_crs_base_cleanup_append(mca_btl_sm_component.mmap_file->map_path, false);
|
opal_crs_base_cleanup_append(mca_btl_sm_component.mmap_file->map_path, false);
|
||||||
}
|
}
|
||||||
|
@ -272,7 +272,7 @@ mca_btl_base_module_t** mca_btl_sm_component_init(
|
|||||||
#if OMPI_ENABLE_PROGRESS_THREADS == 1
|
#if OMPI_ENABLE_PROGRESS_THREADS == 1
|
||||||
/* create a named pipe to receive events */
|
/* create a named pipe to receive events */
|
||||||
sprintf( mca_btl_sm_component.sm_fifo_path,
|
sprintf( mca_btl_sm_component.sm_fifo_path,
|
||||||
"%s"OPAL_PATH_SEP"sm_fifo.%lu", orte_process_info.job_session_dir,
|
"%s"OPAL_PATH_SEP"sm_fifo.%lu", orte_proc_info.job_session_dir,
|
||||||
(unsigned long)ORTE_PROC_MY_NAME->vpid );
|
(unsigned long)ORTE_PROC_MY_NAME->vpid );
|
||||||
if(mkfifo(mca_btl_sm_component.sm_fifo_path, 0660) < 0) {
|
if(mkfifo(mca_btl_sm_component.sm_fifo_path, 0660) < 0) {
|
||||||
opal_output(0, "mca_btl_sm_component_init: mkfifo failed with errno=%d\n",errno);
|
opal_output(0, "mca_btl_sm_component_init: mkfifo failed with errno=%d\n",errno);
|
||||||
|
@ -803,7 +803,7 @@ static int mca_btl_udapl_assign_netmask(mca_btl_udapl_module_t* udapl_btl)
|
|||||||
|
|
||||||
BTL_UDAPL_VERBOSE_HELP(VERBOSE_SHOW_HELP,
|
BTL_UDAPL_VERBOSE_HELP(VERBOSE_SHOW_HELP,
|
||||||
("help-mpi-btl-udapl.txt", "interface not found",
|
("help-mpi-btl-udapl.txt", "interface not found",
|
||||||
true, orte_process_info.nodename, btl_addr_string));
|
true, orte_proc_info.nodename, btl_addr_string));
|
||||||
|
|
||||||
return OMPI_ERROR;
|
return OMPI_ERROR;
|
||||||
}
|
}
|
||||||
@ -817,7 +817,7 @@ static int mca_btl_udapl_assign_netmask(mca_btl_udapl_module_t* udapl_btl)
|
|||||||
|
|
||||||
BTL_UDAPL_VERBOSE_HELP(VERBOSE_SHOW_HELP,
|
BTL_UDAPL_VERBOSE_HELP(VERBOSE_SHOW_HELP,
|
||||||
("help-mpi-btl-udapl.txt", "netmask not found",
|
("help-mpi-btl-udapl.txt", "netmask not found",
|
||||||
true, orte_process_info.nodename, btl_addr_string));
|
true, orte_proc_info.nodename, btl_addr_string));
|
||||||
|
|
||||||
return OMPI_ERROR;
|
return OMPI_ERROR;
|
||||||
}
|
}
|
||||||
@ -831,7 +831,7 @@ static int mca_btl_udapl_assign_netmask(mca_btl_udapl_module_t* udapl_btl)
|
|||||||
/* current uDAPL BTL does not support IPv6 */
|
/* current uDAPL BTL does not support IPv6 */
|
||||||
BTL_UDAPL_VERBOSE_HELP(VERBOSE_SHOW_HELP,
|
BTL_UDAPL_VERBOSE_HELP(VERBOSE_SHOW_HELP,
|
||||||
("help-mpi-btl-udapl.txt", "IPv4 only",
|
("help-mpi-btl-udapl.txt", "IPv4 only",
|
||||||
true, orte_process_info.nodename));
|
true, orte_proc_info.nodename));
|
||||||
|
|
||||||
return OMPI_ERROR;
|
return OMPI_ERROR;
|
||||||
}
|
}
|
||||||
|
@ -418,7 +418,7 @@ static int mca_btl_udapl_modify_ia_list(DAT_COUNT *num_info_entries,
|
|||||||
char *str = opal_argv_join(mca_btl_udapl_component.if_list, ',');
|
char *str = opal_argv_join(mca_btl_udapl_component.if_list, ',');
|
||||||
BTL_UDAPL_VERBOSE_HELP(VERBOSE_SHOW_HELP,
|
BTL_UDAPL_VERBOSE_HELP(VERBOSE_SHOW_HELP,
|
||||||
("help-mpi-btl-udapl.txt", "nonexistent entry",
|
("help-mpi-btl-udapl.txt", "nonexistent entry",
|
||||||
true, orte_process_info.nodename,
|
true, orte_proc_info.nodename,
|
||||||
((NULL != mca_btl_udapl_component.if_include) ?
|
((NULL != mca_btl_udapl_component.if_include) ?
|
||||||
"in" : "ex"), str));
|
"in" : "ex"), str));
|
||||||
free(str);
|
free(str);
|
||||||
|
@ -253,14 +253,14 @@ static int mca_btl_udapl_proc_address_match(
|
|||||||
/* current uDAPL BTL only supports IPv4 */
|
/* current uDAPL BTL only supports IPv4 */
|
||||||
BTL_UDAPL_VERBOSE_HELP(VERBOSE_SHOW_HELP,
|
BTL_UDAPL_VERBOSE_HELP(VERBOSE_SHOW_HELP,
|
||||||
("help-mpi-btl-udapl.txt", "IPv4 only",
|
("help-mpi-btl-udapl.txt", "IPv4 only",
|
||||||
true, orte_process_info.nodename));
|
true, orte_proc_info.nodename));
|
||||||
return OMPI_ERROR;
|
return OMPI_ERROR;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (MCA_BTL_UDAPL_INVALID_PEER_ADDR_IDX == *peer_addr_idx) {
|
if (MCA_BTL_UDAPL_INVALID_PEER_ADDR_IDX == *peer_addr_idx) {
|
||||||
BTL_UDAPL_VERBOSE_HELP(VERBOSE_SHOW_HELP,
|
BTL_UDAPL_VERBOSE_HELP(VERBOSE_SHOW_HELP,
|
||||||
("help-mpi-btl-udapl.txt", "no network match",
|
("help-mpi-btl-udapl.txt", "no network match",
|
||||||
true, btl_addr_string, orte_process_info.nodename,
|
true, btl_addr_string, orte_proc_info.nodename,
|
||||||
peer_proc->proc_ompi->proc_hostname));
|
peer_proc->proc_ompi->proc_hostname));
|
||||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||||
}
|
}
|
||||||
|
@ -518,8 +518,8 @@ static int bootstrap_init(void)
|
|||||||
if (NULL == mca_coll_sm_component.sm_bootstrap_filename) {
|
if (NULL == mca_coll_sm_component.sm_bootstrap_filename) {
|
||||||
return OMPI_ERROR;
|
return OMPI_ERROR;
|
||||||
}
|
}
|
||||||
orte_proc_info();
|
orte_proc_info_init();
|
||||||
fullpath = opal_os_path( false, orte_process_info.job_session_dir,
|
fullpath = opal_os_path( false, orte_proc_info.job_session_dir,
|
||||||
mca_coll_sm_component.sm_bootstrap_filename, NULL );
|
mca_coll_sm_component.sm_bootstrap_filename, NULL );
|
||||||
if (NULL == fullpath) {
|
if (NULL == fullpath) {
|
||||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||||
|
@ -233,7 +233,7 @@ static int allocate_shared_file(size_t size, char **file_name,
|
|||||||
*/
|
*/
|
||||||
unique_comm_id=(int)getpid();
|
unique_comm_id=(int)getpid();
|
||||||
len=asprintf(&f_name,
|
len=asprintf(&f_name,
|
||||||
"%s"OPAL_PATH_SEP"sm_coll_v2_%0d_%0d",orte_process_info.job_session_dir,
|
"%s"OPAL_PATH_SEP"sm_coll_v2_%0d_%0d",orte_proc_info.job_session_dir,
|
||||||
ompi_comm_get_cid(comm),unique_comm_id);
|
ompi_comm_get_cid(comm),unique_comm_id);
|
||||||
if( 0 > len ) {
|
if( 0 > len ) {
|
||||||
return OMPI_ERROR;
|
return OMPI_ERROR;
|
||||||
@ -318,7 +318,7 @@ static int allocate_shared_file(size_t size, char **file_name,
|
|||||||
* communicators, that could have the same communicator id
|
* communicators, that could have the same communicator id
|
||||||
*/
|
*/
|
||||||
len=asprintf(&f_name,
|
len=asprintf(&f_name,
|
||||||
"%s"OPAL_PATH_SEP"sm_coll_v2_%0d_%0d",orte_process_info.job_session_dir,
|
"%s"OPAL_PATH_SEP"sm_coll_v2_%0d_%0d",orte_proc_info.job_session_dir,
|
||||||
ompi_comm_get_cid(comm),unique_comm_id);
|
ompi_comm_get_cid(comm),unique_comm_id);
|
||||||
if( 0 > len ) {
|
if( 0 > len ) {
|
||||||
return OMPI_ERROR;
|
return OMPI_ERROR;
|
||||||
@ -987,8 +987,8 @@ mca_coll_sm2_comm_query(struct ompi_communicator_t *comm, int *priority)
|
|||||||
/* set file name */
|
/* set file name */
|
||||||
/*
|
/*
|
||||||
len=asprintf(&(sm_module->coll_sm2_file_name),
|
len=asprintf(&(sm_module->coll_sm2_file_name),
|
||||||
"%s"OPAL_PATH_SEP"sm_coll_v2%s_%0d\0",orte_process_info.job_session_dir,
|
"%s"OPAL_PATH_SEP"sm_coll_v2%s_%0d\0",orte_proc_info.job_session_dir,
|
||||||
orte_process_info.nodename,ompi_comm_get_cid(comm));
|
orte_proc_info.nodename,ompi_comm_get_cid(comm));
|
||||||
if( 0 > len ) {
|
if( 0 > len ) {
|
||||||
goto CLEANUP;
|
goto CLEANUP;
|
||||||
}
|
}
|
||||||
|
@ -177,7 +177,7 @@ int mca_coll_sync_module_enable(mca_coll_base_module_t *module,
|
|||||||
return OMPI_SUCCESS;
|
return OMPI_SUCCESS;
|
||||||
} else {
|
} else {
|
||||||
orte_show_help("help-coll-sync.txt", "missing collective", true,
|
orte_show_help("help-coll-sync.txt", "missing collective", true,
|
||||||
orte_process_info.nodename,
|
orte_proc_info.nodename,
|
||||||
mca_coll_sync_component.priority, msg);
|
mca_coll_sync_component.priority, msg);
|
||||||
return OMPI_ERR_NOT_FOUND;
|
return OMPI_ERR_NOT_FOUND;
|
||||||
}
|
}
|
||||||
|
@ -6470,10 +6470,10 @@ static void traffic_message_dump_msg_list(opal_list_t *msg_list, bool is_drain)
|
|||||||
|
|
||||||
static void traffic_message_dump_peer(ompi_crcp_bkmrk_pml_peer_ref_t *peer_ref, char * msg, bool root_only)
|
static void traffic_message_dump_peer(ompi_crcp_bkmrk_pml_peer_ref_t *peer_ref, char * msg, bool root_only)
|
||||||
{
|
{
|
||||||
if( root_only && orte_process_info.my_name.vpid != 0 ) {
|
if( root_only && orte_proc_info.my_name.vpid != 0 ) {
|
||||||
return;
|
return;
|
||||||
} else {
|
} else {
|
||||||
sleep(orte_process_info.my_name.vpid * 2);
|
sleep(orte_proc_info.my_name.vpid * 2);
|
||||||
}
|
}
|
||||||
|
|
||||||
opal_output(0, "------------- %s ---------------------------------", msg);
|
opal_output(0, "------------- %s ---------------------------------", msg);
|
||||||
|
@ -770,7 +770,7 @@ static int open_port(char *port_name, orte_rml_tag_t given_tag)
|
|||||||
|
|
||||||
OPAL_THREAD_LOCK(&ompi_dpm_port_mutex);
|
OPAL_THREAD_LOCK(&ompi_dpm_port_mutex);
|
||||||
|
|
||||||
if (NULL == orte_process_info.my_hnp_uri) {
|
if (NULL == orte_proc_info.my_hnp_uri) {
|
||||||
rc = ORTE_ERR_NOT_AVAILABLE;
|
rc = ORTE_ERR_NOT_AVAILABLE;
|
||||||
ORTE_ERROR_LOG(rc);
|
ORTE_ERROR_LOG(rc);
|
||||||
goto cleanup;
|
goto cleanup;
|
||||||
@ -790,7 +790,7 @@ static int open_port(char *port_name, orte_rml_tag_t given_tag)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
len = strlen(orte_process_info.my_hnp_uri) + strlen(rml_uri) + strlen(tag);
|
len = strlen(orte_proc_info.my_hnp_uri) + strlen(rml_uri) + strlen(tag);
|
||||||
|
|
||||||
/* if the overall port name is too long, we abort */
|
/* if the overall port name is too long, we abort */
|
||||||
if (len > (MPI_MAX_PORT_NAME-1)) {
|
if (len > (MPI_MAX_PORT_NAME-1)) {
|
||||||
@ -799,7 +799,7 @@ static int open_port(char *port_name, orte_rml_tag_t given_tag)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* assemble the port name */
|
/* assemble the port name */
|
||||||
snprintf(port_name, MPI_MAX_PORT_NAME, "%s+%s:%s", orte_process_info.my_hnp_uri, rml_uri, tag);
|
snprintf(port_name, MPI_MAX_PORT_NAME, "%s+%s:%s", orte_proc_info.my_hnp_uri, rml_uri, tag);
|
||||||
rc = OMPI_SUCCESS;
|
rc = OMPI_SUCCESS;
|
||||||
|
|
||||||
cleanup:
|
cleanup:
|
||||||
|
@ -137,7 +137,7 @@ mca_mpool_base_module_t* mca_mpool_base_module_create(
|
|||||||
} else {
|
} else {
|
||||||
orte_show_help("help-mpool-base.txt", "leave pinned failed",
|
orte_show_help("help-mpool-base.txt", "leave pinned failed",
|
||||||
true, ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
true, ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||||
orte_process_info.nodename);
|
orte_proc_info.nodename);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -173,14 +173,14 @@ void mca_mpool_base_tree_print(void)
|
|||||||
ompi_debug_show_mpi_alloc_mem_leaks < 0) {
|
ompi_debug_show_mpi_alloc_mem_leaks < 0) {
|
||||||
orte_show_help("help-mpool-base.txt", "all mem leaks",
|
orte_show_help("help-mpool-base.txt", "all mem leaks",
|
||||||
true, ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
true, ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||||
orte_process_info.nodename,
|
orte_proc_info.nodename,
|
||||||
orte_process_info.pid, leak_msg);
|
orte_proc_info.pid, leak_msg);
|
||||||
} else {
|
} else {
|
||||||
int i = num_leaks - ompi_debug_show_mpi_alloc_mem_leaks;
|
int i = num_leaks - ompi_debug_show_mpi_alloc_mem_leaks;
|
||||||
orte_show_help("help-mpool-base.txt", "some mem leaks",
|
orte_show_help("help-mpool-base.txt", "some mem leaks",
|
||||||
true, ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
true, ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||||
orte_process_info.nodename,
|
orte_proc_info.nodename,
|
||||||
orte_process_info.pid, leak_msg, i,
|
orte_proc_info.pid, leak_msg, i,
|
||||||
(i > 1) ? "s were" : " was",
|
(i > 1) ? "s were" : " was",
|
||||||
(i > 1) ? "are" : "is");
|
(i > 1) ? "are" : "is");
|
||||||
}
|
}
|
||||||
|
@ -207,8 +207,8 @@ static mca_mpool_base_module_t* mca_mpool_sm_init(
|
|||||||
|
|
||||||
/* create initial shared memory mapping */
|
/* create initial shared memory mapping */
|
||||||
len = asprintf( &file_name, "%s"OPAL_PATH_SEP"shared_mem_pool.%s",
|
len = asprintf( &file_name, "%s"OPAL_PATH_SEP"shared_mem_pool.%s",
|
||||||
orte_process_info.job_session_dir,
|
orte_proc_info.job_session_dir,
|
||||||
orte_process_info.nodename );
|
orte_proc_info.nodename );
|
||||||
if ( 0 > len ) {
|
if ( 0 > len ) {
|
||||||
free(mpool_module);
|
free(mpool_module);
|
||||||
return NULL;
|
return NULL;
|
||||||
|
@ -131,8 +131,8 @@ int mca_mpool_sm_ft_event(int state) {
|
|||||||
if(OPAL_CRS_CHECKPOINT == state) {
|
if(OPAL_CRS_CHECKPOINT == state) {
|
||||||
/* Record the shared memory filename */
|
/* Record the shared memory filename */
|
||||||
asprintf( &file_name, "%s"OPAL_PATH_SEP"shared_mem_pool.%s",
|
asprintf( &file_name, "%s"OPAL_PATH_SEP"shared_mem_pool.%s",
|
||||||
orte_process_info.job_session_dir,
|
orte_proc_info.job_session_dir,
|
||||||
orte_process_info.nodename );
|
orte_proc_info.nodename );
|
||||||
opal_crs_base_metadata_write_token(NULL, CRS_METADATA_TOUCH, file_name);
|
opal_crs_base_metadata_write_token(NULL, CRS_METADATA_TOUCH, file_name);
|
||||||
free(file_name);
|
free(file_name);
|
||||||
file_name = NULL;
|
file_name = NULL;
|
||||||
|
@ -141,7 +141,7 @@ int vprotocol_pessimist_sender_based_init(const char *mmapfile, size_t size)
|
|||||||
OBJ_CONSTRUCT(&sb.sb_sendreq, opal_list_t);
|
OBJ_CONSTRUCT(&sb.sb_sendreq, opal_list_t);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
asprintf(&path, "%s"OPAL_PATH_SEP"%s", orte_process_info.proc_session_dir,
|
asprintf(&path, "%s"OPAL_PATH_SEP"%s", orte_proc_info.proc_session_dir,
|
||||||
mmapfile);
|
mmapfile);
|
||||||
if(OPAL_SUCCESS != sb_mmap_file_open(path))
|
if(OPAL_SUCCESS != sb_mmap_file_open(path))
|
||||||
return OPAL_ERR_FILE_OPEN_FAILURE;
|
return OPAL_ERR_FILE_OPEN_FAILURE;
|
||||||
|
@ -63,7 +63,7 @@ void ompi_proc_construct(ompi_proc_t* proc)
|
|||||||
* the arch of the remote nodes, we will have to set the convertors to the correct
|
* the arch of the remote nodes, we will have to set the convertors to the correct
|
||||||
* architecture.
|
* architecture.
|
||||||
*/
|
*/
|
||||||
proc->proc_arch = orte_process_info.arch;
|
proc->proc_arch = orte_proc_info.arch;
|
||||||
proc->proc_convertor = ompi_mpi_local_convertor;
|
proc->proc_convertor = ompi_mpi_local_convertor;
|
||||||
OBJ_RETAIN( ompi_mpi_local_convertor );
|
OBJ_RETAIN( ompi_mpi_local_convertor );
|
||||||
|
|
||||||
@ -99,7 +99,7 @@ int ompi_proc_init(void)
|
|||||||
OBJ_CONSTRUCT(&ompi_proc_lock, opal_mutex_t);
|
OBJ_CONSTRUCT(&ompi_proc_lock, opal_mutex_t);
|
||||||
|
|
||||||
/* create proc structures and find self */
|
/* create proc structures and find self */
|
||||||
for( i = 0; i < orte_process_info.num_procs; i++ ) {
|
for( i = 0; i < orte_proc_info.num_procs; i++ ) {
|
||||||
ompi_proc_t *proc = OBJ_NEW(ompi_proc_t);
|
ompi_proc_t *proc = OBJ_NEW(ompi_proc_t);
|
||||||
opal_list_append(&ompi_proc_list, (opal_list_item_t*)proc);
|
opal_list_append(&ompi_proc_list, (opal_list_item_t*)proc);
|
||||||
|
|
||||||
@ -108,8 +108,8 @@ int ompi_proc_init(void)
|
|||||||
if (i == ORTE_PROC_MY_NAME->vpid) {
|
if (i == ORTE_PROC_MY_NAME->vpid) {
|
||||||
ompi_proc_local_proc = proc;
|
ompi_proc_local_proc = proc;
|
||||||
proc->proc_flags = OPAL_PROC_ALL_LOCAL;
|
proc->proc_flags = OPAL_PROC_ALL_LOCAL;
|
||||||
proc->proc_hostname = orte_process_info.nodename;
|
proc->proc_hostname = orte_proc_info.nodename;
|
||||||
proc->proc_arch = orte_process_info.arch;
|
proc->proc_arch = orte_proc_info.arch;
|
||||||
} else {
|
} else {
|
||||||
/* get the locality information */
|
/* get the locality information */
|
||||||
proc->proc_flags = orte_ess.proc_get_locality(&proc->proc_name);
|
proc->proc_flags = orte_ess.proc_get_locality(&proc->proc_name);
|
||||||
@ -146,14 +146,14 @@ int ompi_proc_set_arch(void)
|
|||||||
if (proc->proc_name.vpid != ORTE_PROC_MY_NAME->vpid) {
|
if (proc->proc_name.vpid != ORTE_PROC_MY_NAME->vpid) {
|
||||||
proc->proc_arch = orte_ess.proc_get_arch(&proc->proc_name);
|
proc->proc_arch = orte_ess.proc_get_arch(&proc->proc_name);
|
||||||
/* if arch is different than mine, create a new convertor for this proc */
|
/* if arch is different than mine, create a new convertor for this proc */
|
||||||
if (proc->proc_arch != orte_process_info.arch) {
|
if (proc->proc_arch != orte_proc_info.arch) {
|
||||||
#if OMPI_ENABLE_HETEROGENEOUS_SUPPORT
|
#if OMPI_ENABLE_HETEROGENEOUS_SUPPORT
|
||||||
OBJ_RELEASE(proc->proc_convertor);
|
OBJ_RELEASE(proc->proc_convertor);
|
||||||
proc->proc_convertor = ompi_convertor_create(proc->proc_arch, 0);
|
proc->proc_convertor = ompi_convertor_create(proc->proc_arch, 0);
|
||||||
#else
|
#else
|
||||||
orte_show_help("help-mpi-runtime",
|
orte_show_help("help-mpi-runtime",
|
||||||
"heterogeneous-support-unavailable",
|
"heterogeneous-support-unavailable",
|
||||||
true, orte_process_info.nodename,
|
true, orte_proc_info.nodename,
|
||||||
proc->proc_hostname == NULL ? "<hostname unavailable>" :
|
proc->proc_hostname == NULL ? "<hostname unavailable>" :
|
||||||
proc->proc_hostname);
|
proc->proc_hostname);
|
||||||
OPAL_THREAD_UNLOCK(&ompi_proc_lock);
|
OPAL_THREAD_UNLOCK(&ompi_proc_lock);
|
||||||
@ -353,21 +353,21 @@ int ompi_proc_refresh(void) {
|
|||||||
if (i == ORTE_PROC_MY_NAME->vpid) {
|
if (i == ORTE_PROC_MY_NAME->vpid) {
|
||||||
ompi_proc_local_proc = proc;
|
ompi_proc_local_proc = proc;
|
||||||
proc->proc_flags = OPAL_PROC_ALL_LOCAL;
|
proc->proc_flags = OPAL_PROC_ALL_LOCAL;
|
||||||
proc->proc_hostname = orte_process_info.nodename;
|
proc->proc_hostname = orte_proc_info.nodename;
|
||||||
proc->proc_arch = orte_process_info.arch;
|
proc->proc_arch = orte_proc_info.arch;
|
||||||
} else {
|
} else {
|
||||||
proc->proc_flags = orte_ess.proc_get_locality(&proc->proc_name);
|
proc->proc_flags = orte_ess.proc_get_locality(&proc->proc_name);
|
||||||
proc->proc_hostname = orte_ess.proc_get_hostname(&proc->proc_name);
|
proc->proc_hostname = orte_ess.proc_get_hostname(&proc->proc_name);
|
||||||
proc->proc_arch = orte_ess.proc_get_arch(&proc->proc_name);
|
proc->proc_arch = orte_ess.proc_get_arch(&proc->proc_name);
|
||||||
/* if arch is different than mine, create a new convertor for this proc */
|
/* if arch is different than mine, create a new convertor for this proc */
|
||||||
if (proc->proc_arch != orte_process_info.arch) {
|
if (proc->proc_arch != orte_proc_info.arch) {
|
||||||
#if OMPI_ENABLE_HETEROGENEOUS_SUPPORT
|
#if OMPI_ENABLE_HETEROGENEOUS_SUPPORT
|
||||||
OBJ_RELEASE(proc->proc_convertor);
|
OBJ_RELEASE(proc->proc_convertor);
|
||||||
proc->proc_convertor = ompi_convertor_create(proc->proc_arch, 0);
|
proc->proc_convertor = ompi_convertor_create(proc->proc_arch, 0);
|
||||||
#else
|
#else
|
||||||
orte_show_help("help-mpi-runtime",
|
orte_show_help("help-mpi-runtime",
|
||||||
"heterogeneous-support-unavailable",
|
"heterogeneous-support-unavailable",
|
||||||
true, orte_process_info.nodename,
|
true, orte_proc_info.nodename,
|
||||||
proc->proc_hostname == NULL ? "<hostname unavailable>" :
|
proc->proc_hostname == NULL ? "<hostname unavailable>" :
|
||||||
proc->proc_hostname);
|
proc->proc_hostname);
|
||||||
OPAL_THREAD_UNLOCK(&ompi_proc_lock);
|
OPAL_THREAD_UNLOCK(&ompi_proc_lock);
|
||||||
@ -539,7 +539,7 @@ ompi_proc_unpack(opal_buffer_t* buf,
|
|||||||
#else
|
#else
|
||||||
orte_show_help("help-mpi-runtime",
|
orte_show_help("help-mpi-runtime",
|
||||||
"heterogeneous-support-unavailable",
|
"heterogeneous-support-unavailable",
|
||||||
true, orte_process_info.nodename,
|
true, orte_proc_info.nodename,
|
||||||
new_hostname == NULL ? "<hostname unavailable>" :
|
new_hostname == NULL ? "<hostname unavailable>" :
|
||||||
new_hostname);
|
new_hostname);
|
||||||
free(plist);
|
free(plist);
|
||||||
|
@ -68,7 +68,7 @@ ompi_mpi_abort(struct ompi_communicator_t* comm,
|
|||||||
gethostname. */
|
gethostname. */
|
||||||
|
|
||||||
if (orte_initialized) {
|
if (orte_initialized) {
|
||||||
host = orte_process_info.nodename;
|
host = orte_proc_info.nodename;
|
||||||
} else {
|
} else {
|
||||||
gethostname(hostname, sizeof(hostname));
|
gethostname(hostname, sizeof(hostname));
|
||||||
host = hostname;
|
host = hostname;
|
||||||
|
@ -135,7 +135,7 @@ static void warn_fork_cb(void)
|
|||||||
{
|
{
|
||||||
if (ompi_mpi_initialized && !ompi_mpi_finalized && !fork_warning_issued) {
|
if (ompi_mpi_initialized && !ompi_mpi_finalized && !fork_warning_issued) {
|
||||||
orte_show_help("help-mpi-runtime.txt", "mpi_init:warn-fork", true,
|
orte_show_help("help-mpi-runtime.txt", "mpi_init:warn-fork", true,
|
||||||
orte_process_info.nodename, getpid(),
|
orte_proc_info.nodename, getpid(),
|
||||||
ompi_mpi_comm_world.comm.c_my_rank);
|
ompi_mpi_comm_world.comm.c_my_rank);
|
||||||
fork_warning_issued = true;
|
fork_warning_issued = true;
|
||||||
}
|
}
|
||||||
@ -341,7 +341,7 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* Setup ORTE - note that we are not a tool */
|
/* Setup ORTE - note that we are not a tool */
|
||||||
orte_process_info.mpi_proc = true;
|
orte_proc_info.mpi_proc = true;
|
||||||
if (ORTE_SUCCESS != (ret = orte_init(ORTE_NON_TOOL))) {
|
if (ORTE_SUCCESS != (ret = orte_init(ORTE_NON_TOOL))) {
|
||||||
error = "ompi_mpi_init: orte_init failed";
|
error = "ompi_mpi_init: orte_init failed";
|
||||||
goto error;
|
goto error;
|
||||||
@ -698,7 +698,7 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
|
|||||||
if (ompi_mpi_show_mca_params) {
|
if (ompi_mpi_show_mca_params) {
|
||||||
ompi_show_all_mca_params(ompi_mpi_comm_world.comm.c_my_rank,
|
ompi_show_all_mca_params(ompi_mpi_comm_world.comm.c_my_rank,
|
||||||
nprocs,
|
nprocs,
|
||||||
orte_process_info.nodename);
|
orte_proc_info.nodename);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Do we need to wait for a debugger? */
|
/* Do we need to wait for a debugger? */
|
||||||
|
@ -275,9 +275,9 @@ void ompi_info::open_components()
|
|||||||
component_map["installdirs"] = &opal_installdirs_components;
|
component_map["installdirs"] = &opal_installdirs_components;
|
||||||
|
|
||||||
// ORTE frameworks
|
// ORTE frameworks
|
||||||
// Set orte_process_info.hnp to true to force all frameworks to
|
// Set orte_proc_info.hnp to true to force all frameworks to
|
||||||
// open components
|
// open components
|
||||||
orte_process_info.hnp = true;
|
orte_proc_info.hnp = true;
|
||||||
|
|
||||||
if (ORTE_SUCCESS != orte_errmgr_base_open()) {
|
if (ORTE_SUCCESS != orte_errmgr_base_open()) {
|
||||||
goto error;
|
goto error;
|
||||||
|
@ -94,7 +94,7 @@ int orte_errmgr_default_component_close(void)
|
|||||||
int orte_errmgr_default_component_query(mca_base_module_t **module, int *priority)
|
int orte_errmgr_default_component_query(mca_base_module_t **module, int *priority)
|
||||||
{
|
{
|
||||||
/* If we are not an HNP, then don't pick us! */
|
/* If we are not an HNP, then don't pick us! */
|
||||||
if (!orte_process_info.hnp) {
|
if (!orte_proc_info.hnp) {
|
||||||
/* don't take me! */
|
/* don't take me! */
|
||||||
*module = NULL;
|
*module = NULL;
|
||||||
return ORTE_ERROR;
|
return ORTE_ERROR;
|
||||||
|
@ -85,13 +85,13 @@ static int rte_init(char flags)
|
|||||||
/* if I am a daemon, complete my setup using the
|
/* if I am a daemon, complete my setup using the
|
||||||
* default procedure
|
* default procedure
|
||||||
*/
|
*/
|
||||||
if (orte_process_info.daemon) {
|
if (orte_proc_info.daemon) {
|
||||||
if (ORTE_SUCCESS != (ret = orte_ess_base_orted_setup())) {
|
if (ORTE_SUCCESS != (ret = orte_ess_base_orted_setup())) {
|
||||||
ORTE_ERROR_LOG(ret);
|
ORTE_ERROR_LOG(ret);
|
||||||
error = "orte_ess_base_orted_setup";
|
error = "orte_ess_base_orted_setup";
|
||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
} else if (orte_process_info.tool) {
|
} else if (orte_proc_info.tool) {
|
||||||
/* otherwise, if I am a tool proc, use that procedure */
|
/* otherwise, if I am a tool proc, use that procedure */
|
||||||
if (ORTE_SUCCESS != (ret = orte_ess_base_tool_setup())) {
|
if (ORTE_SUCCESS != (ret = orte_ess_base_tool_setup())) {
|
||||||
ORTE_ERROR_LOG(ret);
|
ORTE_ERROR_LOG(ret);
|
||||||
@ -112,7 +112,7 @@ static int rte_init(char flags)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* setup the nidmap arrays */
|
/* setup the nidmap arrays */
|
||||||
if (ORTE_SUCCESS != (ret = orte_util_nidmap_init(orte_process_info.sync_buf))) {
|
if (ORTE_SUCCESS != (ret = orte_util_nidmap_init(orte_proc_info.sync_buf))) {
|
||||||
ORTE_ERROR_LOG(ret);
|
ORTE_ERROR_LOG(ret);
|
||||||
error = "orte_util_nidmap_init";
|
error = "orte_util_nidmap_init";
|
||||||
goto error;
|
goto error;
|
||||||
@ -133,11 +133,11 @@ static int rte_finalize(void)
|
|||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
/* if I am a daemon, finalize using the default procedure */
|
/* if I am a daemon, finalize using the default procedure */
|
||||||
if (orte_process_info.daemon) {
|
if (orte_proc_info.daemon) {
|
||||||
if (ORTE_SUCCESS != (ret = orte_ess_base_orted_finalize())) {
|
if (ORTE_SUCCESS != (ret = orte_ess_base_orted_finalize())) {
|
||||||
ORTE_ERROR_LOG(ret);
|
ORTE_ERROR_LOG(ret);
|
||||||
}
|
}
|
||||||
} else if (orte_process_info.tool) {
|
} else if (orte_proc_info.tool) {
|
||||||
/* otherwise, if I am a tool proc, use that procedure */
|
/* otherwise, if I am a tool proc, use that procedure */
|
||||||
if (ORTE_SUCCESS != (ret = orte_ess_base_tool_finalize())) {
|
if (ORTE_SUCCESS != (ret = orte_ess_base_tool_finalize())) {
|
||||||
ORTE_ERROR_LOG(ret);
|
ORTE_ERROR_LOG(ret);
|
||||||
@ -357,7 +357,7 @@ static int alps_set_name(void)
|
|||||||
OPAL_OUTPUT_VERBOSE((1, orte_ess_base_output,
|
OPAL_OUTPUT_VERBOSE((1, orte_ess_base_output,
|
||||||
"ess:alps set name to %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
"ess:alps set name to %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||||
|
|
||||||
orte_process_info.num_procs = (orte_std_cntr_t) cnos_get_size();
|
orte_proc_info.num_procs = (orte_std_cntr_t) cnos_get_size();
|
||||||
|
|
||||||
return ORTE_SUCCESS;
|
return ORTE_SUCCESS;
|
||||||
}
|
}
|
||||||
|
@ -45,7 +45,7 @@ int orte_ess_env_get(void)
|
|||||||
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
|
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
|
||||||
return ORTE_ERR_NOT_FOUND;
|
return ORTE_ERR_NOT_FOUND;
|
||||||
}
|
}
|
||||||
orte_process_info.num_procs = (orte_std_cntr_t)num_procs;
|
orte_proc_info.num_procs = (orte_std_cntr_t)num_procs;
|
||||||
|
|
||||||
return ORTE_SUCCESS;
|
return ORTE_SUCCESS;
|
||||||
}
|
}
|
||||||
|
@ -123,12 +123,12 @@ int orte_ess_base_app_setup(void)
|
|||||||
OPAL_OUTPUT_VERBOSE((2, orte_debug_output,
|
OPAL_OUTPUT_VERBOSE((2, orte_debug_output,
|
||||||
"%s setting up session dir with\n\ttmpdir: %s\n\thost %s",
|
"%s setting up session dir with\n\ttmpdir: %s\n\thost %s",
|
||||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||||
(NULL == orte_process_info.tmpdir_base) ? "UNDEF" : orte_process_info.tmpdir_base,
|
(NULL == orte_proc_info.tmpdir_base) ? "UNDEF" : orte_proc_info.tmpdir_base,
|
||||||
orte_process_info.nodename));
|
orte_proc_info.nodename));
|
||||||
|
|
||||||
if (ORTE_SUCCESS != (ret = orte_session_dir(true,
|
if (ORTE_SUCCESS != (ret = orte_session_dir(true,
|
||||||
orte_process_info.tmpdir_base,
|
orte_proc_info.tmpdir_base,
|
||||||
orte_process_info.nodename, NULL,
|
orte_proc_info.nodename, NULL,
|
||||||
ORTE_PROC_MY_NAME))) {
|
ORTE_PROC_MY_NAME))) {
|
||||||
ORTE_ERROR_LOG(ret);
|
ORTE_ERROR_LOG(ret);
|
||||||
error = "orte_session_dir";
|
error = "orte_session_dir";
|
||||||
@ -138,7 +138,7 @@ int orte_ess_base_app_setup(void)
|
|||||||
/* Once the session directory location has been established, set
|
/* Once the session directory location has been established, set
|
||||||
the opal_output env file location to be in the
|
the opal_output env file location to be in the
|
||||||
proc-specific session directory. */
|
proc-specific session directory. */
|
||||||
opal_output_set_output_file_info(orte_process_info.proc_session_dir,
|
opal_output_set_output_file_info(orte_proc_info.proc_session_dir,
|
||||||
"output-", NULL, NULL);
|
"output-", NULL, NULL);
|
||||||
|
|
||||||
|
|
||||||
@ -164,7 +164,7 @@ int orte_ess_base_app_setup(void)
|
|||||||
error = "orte_snapc_base_open";
|
error = "orte_snapc_base_open";
|
||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
if (ORTE_SUCCESS != (ret = orte_snapc_base_select(orte_process_info.hnp, !orte_process_info.daemon))) {
|
if (ORTE_SUCCESS != (ret = orte_snapc_base_select(orte_proc_info.hnp, !orte_proc_info.daemon))) {
|
||||||
ORTE_ERROR_LOG(ret);
|
ORTE_ERROR_LOG(ret);
|
||||||
error = "orte_snapc_base_select";
|
error = "orte_snapc_base_select";
|
||||||
goto error;
|
goto error;
|
||||||
@ -278,7 +278,7 @@ void orte_ess_base_app_abort(int status, bool report)
|
|||||||
* write an "abort" file into our session directory
|
* write an "abort" file into our session directory
|
||||||
*/
|
*/
|
||||||
if (report) {
|
if (report) {
|
||||||
abort_file = opal_os_path(false, orte_process_info.proc_session_dir, "abort", NULL);
|
abort_file = opal_os_path(false, orte_proc_info.proc_session_dir, "abort", NULL);
|
||||||
if (NULL == abort_file) {
|
if (NULL == abort_file) {
|
||||||
/* got a problem */
|
/* got a problem */
|
||||||
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
|
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
|
||||||
|
@ -187,12 +187,12 @@ int orte_ess_base_orted_setup(void)
|
|||||||
OPAL_OUTPUT_VERBOSE((2, orte_debug_output,
|
OPAL_OUTPUT_VERBOSE((2, orte_debug_output,
|
||||||
"%s setting up session dir with\n\ttmpdir: %s\n\thost %s",
|
"%s setting up session dir with\n\ttmpdir: %s\n\thost %s",
|
||||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||||
(NULL == orte_process_info.tmpdir_base) ? "UNDEF" : orte_process_info.tmpdir_base,
|
(NULL == orte_proc_info.tmpdir_base) ? "UNDEF" : orte_proc_info.tmpdir_base,
|
||||||
orte_process_info.nodename));
|
orte_proc_info.nodename));
|
||||||
|
|
||||||
if (ORTE_SUCCESS != (ret = orte_session_dir(true,
|
if (ORTE_SUCCESS != (ret = orte_session_dir(true,
|
||||||
orte_process_info.tmpdir_base,
|
orte_proc_info.tmpdir_base,
|
||||||
orte_process_info.nodename, NULL,
|
orte_proc_info.nodename, NULL,
|
||||||
ORTE_PROC_MY_NAME))) {
|
ORTE_PROC_MY_NAME))) {
|
||||||
ORTE_ERROR_LOG(ret);
|
ORTE_ERROR_LOG(ret);
|
||||||
error = "orte_session_dir";
|
error = "orte_session_dir";
|
||||||
@ -243,7 +243,7 @@ int orte_ess_base_orted_setup(void)
|
|||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ORTE_SUCCESS != (ret = orte_snapc_base_select(orte_process_info.hnp, !orte_process_info.daemon))) {
|
if (ORTE_SUCCESS != (ret = orte_snapc_base_select(orte_proc_info.hnp, !orte_proc_info.daemon))) {
|
||||||
ORTE_ERROR_LOG(ret);
|
ORTE_ERROR_LOG(ret);
|
||||||
error = "orte_snapc_base_select";
|
error = "orte_snapc_base_select";
|
||||||
goto error;
|
goto error;
|
||||||
|
@ -98,9 +98,9 @@ int orte_ess_base_tool_setup(void)
|
|||||||
* this node might be located
|
* this node might be located
|
||||||
*/
|
*/
|
||||||
if (ORTE_SUCCESS != (ret = orte_session_dir_get_name(NULL,
|
if (ORTE_SUCCESS != (ret = orte_session_dir_get_name(NULL,
|
||||||
&orte_process_info.tmpdir_base,
|
&orte_proc_info.tmpdir_base,
|
||||||
&orte_process_info.top_session_dir,
|
&orte_proc_info.top_session_dir,
|
||||||
orte_process_info.nodename, NULL, NULL))) {
|
orte_proc_info.nodename, NULL, NULL))) {
|
||||||
ORTE_ERROR_LOG(ret);
|
ORTE_ERROR_LOG(ret);
|
||||||
error = "define session dir names";
|
error = "define session dir names";
|
||||||
goto error;
|
goto error;
|
||||||
@ -136,7 +136,7 @@ int orte_ess_base_tool_setup(void)
|
|||||||
error = "orte_snapc_base_open";
|
error = "orte_snapc_base_open";
|
||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
if (ORTE_SUCCESS != (ret = orte_snapc_base_select(orte_process_info.hnp, !orte_process_info.daemon))) {
|
if (ORTE_SUCCESS != (ret = orte_snapc_base_select(orte_proc_info.hnp, !orte_proc_info.daemon))) {
|
||||||
ORTE_ERROR_LOG(ret);
|
ORTE_ERROR_LOG(ret);
|
||||||
error = "orte_snapc_base_select";
|
error = "orte_snapc_base_select";
|
||||||
goto error;
|
goto error;
|
||||||
|
@ -79,14 +79,14 @@ static int rte_init(char flags)
|
|||||||
/* if I am a daemon, complete my setup using the
|
/* if I am a daemon, complete my setup using the
|
||||||
* default procedure
|
* default procedure
|
||||||
*/
|
*/
|
||||||
if (orte_process_info.daemon) {
|
if (orte_proc_info.daemon) {
|
||||||
if (ORTE_SUCCESS != (ret = orte_ess_base_orted_setup())) {
|
if (ORTE_SUCCESS != (ret = orte_ess_base_orted_setup())) {
|
||||||
ORTE_ERROR_LOG(ret);
|
ORTE_ERROR_LOG(ret);
|
||||||
error = "orte_ess_base_orted_setup";
|
error = "orte_ess_base_orted_setup";
|
||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
|
|
||||||
} else if (orte_process_info.tool) {
|
} else if (orte_proc_info.tool) {
|
||||||
/* otherwise, if I am a tool proc, use that procedure */
|
/* otherwise, if I am a tool proc, use that procedure */
|
||||||
if (ORTE_SUCCESS != (ret = orte_ess_base_tool_setup())) {
|
if (ORTE_SUCCESS != (ret = orte_ess_base_tool_setup())) {
|
||||||
ORTE_ERROR_LOG(ret);
|
ORTE_ERROR_LOG(ret);
|
||||||
@ -109,7 +109,7 @@ static int rte_init(char flags)
|
|||||||
opal_pointer_array_init(&nidmap, 8, INT32_MAX, 8);
|
opal_pointer_array_init(&nidmap, 8, INT32_MAX, 8);
|
||||||
|
|
||||||
/* if one was provided, build my nidmap */
|
/* if one was provided, build my nidmap */
|
||||||
if (ORTE_SUCCESS != (ret = orte_ess_base_build_nidmap(orte_process_info.sync_buf,
|
if (ORTE_SUCCESS != (ret = orte_ess_base_build_nidmap(orte_proc_info.sync_buf,
|
||||||
&nidmap, &pmap, &nprocs))) {
|
&nidmap, &pmap, &nprocs))) {
|
||||||
ORTE_ERROR_LOG(ret);
|
ORTE_ERROR_LOG(ret);
|
||||||
error = "orte_ess_base_build_nidmap";
|
error = "orte_ess_base_build_nidmap";
|
||||||
@ -134,11 +134,11 @@ static int rte_finalize(void)
|
|||||||
int32_t i;
|
int32_t i;
|
||||||
|
|
||||||
/* if I am a daemon, finalize using the default procedure */
|
/* if I am a daemon, finalize using the default procedure */
|
||||||
if (orte_process_info.daemon) {
|
if (orte_proc_info.daemon) {
|
||||||
if (ORTE_SUCCESS != (ret = orte_ess_base_orted_finalize())) {
|
if (ORTE_SUCCESS != (ret = orte_ess_base_orted_finalize())) {
|
||||||
ORTE_ERROR_LOG(ret);
|
ORTE_ERROR_LOG(ret);
|
||||||
}
|
}
|
||||||
} else if (orte_process_info.tool) {
|
} else if (orte_proc_info.tool) {
|
||||||
/* otherwise, if I am a tool proc, use that procedure */
|
/* otherwise, if I am a tool proc, use that procedure */
|
||||||
if (ORTE_SUCCESS != (ret = orte_ess_base_tool_finalize())) {
|
if (ORTE_SUCCESS != (ret = orte_ess_base_tool_finalize())) {
|
||||||
ORTE_ERROR_LOG(ret);
|
ORTE_ERROR_LOG(ret);
|
||||||
@ -371,10 +371,10 @@ static int bproc_set_name(void)
|
|||||||
ORTE_PROC_MY_NAME->vpid = vpid_start + (bproc_rank * stride);
|
ORTE_PROC_MY_NAME->vpid = vpid_start + (bproc_rank * stride);
|
||||||
|
|
||||||
|
|
||||||
if(NULL != orte_process_info.nodename) {
|
if(NULL != orte_proc_info.nodename) {
|
||||||
free(orte_process_info.nodename);
|
free(orte_proc_info.nodename);
|
||||||
}
|
}
|
||||||
asprintf(&orte_process_info.nodename, "%d", bproc_currnode());
|
asprintf(&orte_proc_info.nodename, "%d", bproc_currnode());
|
||||||
|
|
||||||
return ORTE_SUCCESS;
|
return ORTE_SUCCESS;
|
||||||
}
|
}
|
||||||
|
@ -85,7 +85,7 @@ static int rte_init(char flags)
|
|||||||
ORTE_PROC_MY_NAME->vpid = (orte_vpid_t) cnos_get_rank();
|
ORTE_PROC_MY_NAME->vpid = (orte_vpid_t) cnos_get_rank();
|
||||||
|
|
||||||
/* Get the number of procs in the job from cnos */
|
/* Get the number of procs in the job from cnos */
|
||||||
orte_process_info.num_procs = (orte_std_cntr_t) cnos_get_size();
|
orte_proc_info.num_procs = (orte_std_cntr_t) cnos_get_size();
|
||||||
|
|
||||||
/* Get the nid map */
|
/* Get the nid map */
|
||||||
nprocs = cnos_get_nidpid_map(&map);
|
nprocs = cnos_get_nidpid_map(&map);
|
||||||
@ -146,7 +146,7 @@ static char* proc_get_hostname(orte_process_name_t *proc)
|
|||||||
static uint32_t proc_get_arch(orte_process_name_t *proc)
|
static uint32_t proc_get_arch(orte_process_name_t *proc)
|
||||||
{
|
{
|
||||||
/* always homogeneous, so other side is always same as us */
|
/* always homogeneous, so other side is always same as us */
|
||||||
return orte_process_info.arch;
|
return orte_proc_info.arch;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int update_arch(orte_process_name_t *proc, uint32_t arch)
|
static int update_arch(orte_process_name_t *proc, uint32_t arch)
|
||||||
|
2
orte/mca/ess/env/ess_env_component.c
поставляемый
2
orte/mca/ess/env/ess_env_component.c
поставляемый
@ -81,7 +81,7 @@ int orte_ess_env_component_query(mca_base_module_t **module, int *priority)
|
|||||||
* it would be impossible for the correct env vars
|
* it would be impossible for the correct env vars
|
||||||
* to have been set!
|
* to have been set!
|
||||||
*/
|
*/
|
||||||
if (NULL != orte_process_info.my_hnp_uri) {
|
if (NULL != orte_proc_info.my_hnp_uri) {
|
||||||
*priority = 20;
|
*priority = 20;
|
||||||
*module = (mca_base_module_t *)&orte_ess_env_module;
|
*module = (mca_base_module_t *)&orte_ess_env_module;
|
||||||
return ORTE_SUCCESS;
|
return ORTE_SUCCESS;
|
||||||
|
24
orte/mca/ess/env/ess_env_module.c
поставляемый
24
orte/mca/ess/env/ess_env_module.c
поставляемый
@ -136,14 +136,14 @@ static int rte_init(char flags)
|
|||||||
/* if I am a daemon, complete my setup using the
|
/* if I am a daemon, complete my setup using the
|
||||||
* default procedure
|
* default procedure
|
||||||
*/
|
*/
|
||||||
if (orte_process_info.daemon) {
|
if (orte_proc_info.daemon) {
|
||||||
if (ORTE_SUCCESS != (ret = orte_ess_base_orted_setup())) {
|
if (ORTE_SUCCESS != (ret = orte_ess_base_orted_setup())) {
|
||||||
ORTE_ERROR_LOG(ret);
|
ORTE_ERROR_LOG(ret);
|
||||||
error = "orte_ess_base_orted_setup";
|
error = "orte_ess_base_orted_setup";
|
||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
|
|
||||||
} else if (orte_process_info.tool) {
|
} else if (orte_proc_info.tool) {
|
||||||
/* otherwise, if I am a tool proc, use that procedure */
|
/* otherwise, if I am a tool proc, use that procedure */
|
||||||
if (ORTE_SUCCESS != (ret = orte_ess_base_tool_setup())) {
|
if (ORTE_SUCCESS != (ret = orte_ess_base_tool_setup())) {
|
||||||
ORTE_ERROR_LOG(ret);
|
ORTE_ERROR_LOG(ret);
|
||||||
@ -165,7 +165,7 @@ static int rte_init(char flags)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* if one was provided, build my nidmap */
|
/* if one was provided, build my nidmap */
|
||||||
if (ORTE_SUCCESS != (ret = orte_util_nidmap_init(orte_process_info.sync_buf))) {
|
if (ORTE_SUCCESS != (ret = orte_util_nidmap_init(orte_proc_info.sync_buf))) {
|
||||||
ORTE_ERROR_LOG(ret);
|
ORTE_ERROR_LOG(ret);
|
||||||
error = "orte_util_nidmap_init";
|
error = "orte_util_nidmap_init";
|
||||||
goto error;
|
goto error;
|
||||||
@ -186,11 +186,11 @@ static int rte_finalize(void)
|
|||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
/* if I am a daemon, finalize using the default procedure */
|
/* if I am a daemon, finalize using the default procedure */
|
||||||
if (orte_process_info.daemon) {
|
if (orte_proc_info.daemon) {
|
||||||
if (ORTE_SUCCESS != (ret = orte_ess_base_orted_finalize())) {
|
if (ORTE_SUCCESS != (ret = orte_ess_base_orted_finalize())) {
|
||||||
ORTE_ERROR_LOG(ret);
|
ORTE_ERROR_LOG(ret);
|
||||||
}
|
}
|
||||||
} else if (orte_process_info.tool) {
|
} else if (orte_proc_info.tool) {
|
||||||
/* otherwise, if I am a tool proc, use that procedure */
|
/* otherwise, if I am a tool proc, use that procedure */
|
||||||
if (ORTE_SUCCESS != (ret = orte_ess_base_tool_finalize())) {
|
if (ORTE_SUCCESS != (ret = orte_ess_base_tool_finalize())) {
|
||||||
ORTE_ERROR_LOG(ret);
|
ORTE_ERROR_LOG(ret);
|
||||||
@ -505,12 +505,12 @@ static int rte_ft_event(int state)
|
|||||||
* Restart the routed framework
|
* Restart the routed framework
|
||||||
* JJH: Lie to the finalize function so it does not try to contact the daemon.
|
* JJH: Lie to the finalize function so it does not try to contact the daemon.
|
||||||
*/
|
*/
|
||||||
orte_process_info.tool = true;
|
orte_proc_info.tool = true;
|
||||||
if (ORTE_SUCCESS != (ret = orte_routed.finalize()) ) {
|
if (ORTE_SUCCESS != (ret = orte_routed.finalize()) ) {
|
||||||
exit_status = ret;
|
exit_status = ret;
|
||||||
goto cleanup;
|
goto cleanup;
|
||||||
}
|
}
|
||||||
orte_process_info.tool = false;
|
orte_proc_info.tool = false;
|
||||||
if (ORTE_SUCCESS != (ret = orte_routed.initialize()) ) {
|
if (ORTE_SUCCESS != (ret = orte_routed.initialize()) ) {
|
||||||
exit_status = ret;
|
exit_status = ret;
|
||||||
goto cleanup;
|
goto cleanup;
|
||||||
@ -556,14 +556,14 @@ static int rte_ft_event(int state)
|
|||||||
* Session directory re-init
|
* Session directory re-init
|
||||||
*/
|
*/
|
||||||
if (ORTE_SUCCESS != (ret = orte_session_dir(true,
|
if (ORTE_SUCCESS != (ret = orte_session_dir(true,
|
||||||
orte_process_info.tmpdir_base,
|
orte_proc_info.tmpdir_base,
|
||||||
orte_process_info.nodename,
|
orte_proc_info.nodename,
|
||||||
NULL, /* Batch ID -- Not used */
|
NULL, /* Batch ID -- Not used */
|
||||||
ORTE_PROC_MY_NAME))) {
|
ORTE_PROC_MY_NAME))) {
|
||||||
exit_status = ret;
|
exit_status = ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
opal_output_set_output_file_info(orte_process_info.proc_session_dir,
|
opal_output_set_output_file_info(orte_proc_info.proc_session_dir,
|
||||||
"output-", NULL, NULL);
|
"output-", NULL, NULL);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -590,13 +590,13 @@ static int rte_ft_event(int state)
|
|||||||
* - Note: BLCR does this because it tries to preseve the PID
|
* - Note: BLCR does this because it tries to preseve the PID
|
||||||
* of the program across checkpointes
|
* of the program across checkpointes
|
||||||
*/
|
*/
|
||||||
if( ORTE_SUCCESS != (ret = ess_env_ft_event_update_process_info(orte_process_info.my_name, getpid())) ) {
|
if( ORTE_SUCCESS != (ret = ess_env_ft_event_update_process_info(orte_proc_info.my_name, getpid())) ) {
|
||||||
exit_status = ret;
|
exit_status = ret;
|
||||||
goto cleanup;
|
goto cleanup;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* if one was provided, build my nidmap */
|
/* if one was provided, build my nidmap */
|
||||||
if (ORTE_SUCCESS != (ret = orte_util_nidmap_init(orte_process_info.sync_buf))) {
|
if (ORTE_SUCCESS != (ret = orte_util_nidmap_init(orte_proc_info.sync_buf))) {
|
||||||
ORTE_ERROR_LOG(ret);
|
ORTE_ERROR_LOG(ret);
|
||||||
exit_status = ret;
|
exit_status = ret;
|
||||||
goto cleanup;
|
goto cleanup;
|
||||||
|
@ -73,7 +73,7 @@ int orte_ess_hnp_component_query(mca_base_module_t **module, int *priority)
|
|||||||
/* we are the hnp module - we need to be selected
|
/* we are the hnp module - we need to be selected
|
||||||
* IFF we are designated as the hnp
|
* IFF we are designated as the hnp
|
||||||
*/
|
*/
|
||||||
if (orte_process_info.hnp) {
|
if (orte_proc_info.hnp) {
|
||||||
*priority = 100;
|
*priority = 100;
|
||||||
*module = (mca_base_module_t *)&orte_ess_hnp_module;
|
*module = (mca_base_module_t *)&orte_ess_hnp_module;
|
||||||
return ORTE_SUCCESS;
|
return ORTE_SUCCESS;
|
||||||
|
@ -287,12 +287,12 @@ static int rte_init(char flags)
|
|||||||
OPAL_OUTPUT_VERBOSE((2, orte_debug_output,
|
OPAL_OUTPUT_VERBOSE((2, orte_debug_output,
|
||||||
"%s setting up session dir with\n\ttmpdir: %s\n\thost %s",
|
"%s setting up session dir with\n\ttmpdir: %s\n\thost %s",
|
||||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||||
(NULL == orte_process_info.tmpdir_base) ? "UNDEF" : orte_process_info.tmpdir_base,
|
(NULL == orte_proc_info.tmpdir_base) ? "UNDEF" : orte_proc_info.tmpdir_base,
|
||||||
orte_process_info.nodename));
|
orte_proc_info.nodename));
|
||||||
|
|
||||||
if (ORTE_SUCCESS != (ret = orte_session_dir(true,
|
if (ORTE_SUCCESS != (ret = orte_session_dir(true,
|
||||||
orte_process_info.tmpdir_base,
|
orte_proc_info.tmpdir_base,
|
||||||
orte_process_info.nodename, NULL,
|
orte_proc_info.nodename, NULL,
|
||||||
ORTE_PROC_MY_NAME))) {
|
ORTE_PROC_MY_NAME))) {
|
||||||
ORTE_ERROR_LOG(ret);
|
ORTE_ERROR_LOG(ret);
|
||||||
error = "orte_session_dir";
|
error = "orte_session_dir";
|
||||||
@ -302,11 +302,11 @@ static int rte_init(char flags)
|
|||||||
/* Once the session directory location has been established, set
|
/* Once the session directory location has been established, set
|
||||||
the opal_output hnp file location to be in the
|
the opal_output hnp file location to be in the
|
||||||
proc-specific session directory. */
|
proc-specific session directory. */
|
||||||
opal_output_set_output_file_info(orte_process_info.proc_session_dir,
|
opal_output_set_output_file_info(orte_proc_info.proc_session_dir,
|
||||||
"output-", NULL, NULL);
|
"output-", NULL, NULL);
|
||||||
|
|
||||||
/* save my contact info in a file for others to find */
|
/* save my contact info in a file for others to find */
|
||||||
jobfam_dir = opal_dirname(orte_process_info.job_session_dir);
|
jobfam_dir = opal_dirname(orte_proc_info.job_session_dir);
|
||||||
contact_path = opal_os_path(false, jobfam_dir, "contact.txt", NULL);
|
contact_path = opal_os_path(false, jobfam_dir, "contact.txt", NULL);
|
||||||
free(jobfam_dir);
|
free(jobfam_dir);
|
||||||
|
|
||||||
@ -356,15 +356,15 @@ static int rte_init(char flags)
|
|||||||
|
|
||||||
/* create and store a node object where we are */
|
/* create and store a node object where we are */
|
||||||
node = OBJ_NEW(orte_node_t);
|
node = OBJ_NEW(orte_node_t);
|
||||||
node->name = strdup(orte_process_info.nodename);
|
node->name = strdup(orte_proc_info.nodename);
|
||||||
node->arch = orte_process_info.arch;
|
node->arch = orte_proc_info.arch;
|
||||||
node->index = opal_pointer_array_add(orte_node_pool, node);
|
node->index = opal_pointer_array_add(orte_node_pool, node);
|
||||||
|
|
||||||
/* create and store a proc object for us */
|
/* create and store a proc object for us */
|
||||||
proc = OBJ_NEW(orte_proc_t);
|
proc = OBJ_NEW(orte_proc_t);
|
||||||
proc->name.jobid = ORTE_PROC_MY_NAME->jobid;
|
proc->name.jobid = ORTE_PROC_MY_NAME->jobid;
|
||||||
proc->name.vpid = ORTE_PROC_MY_NAME->vpid;
|
proc->name.vpid = ORTE_PROC_MY_NAME->vpid;
|
||||||
proc->pid = orte_process_info.pid;
|
proc->pid = orte_proc_info.pid;
|
||||||
proc->rml_uri = orte_rml.get_contact_info();
|
proc->rml_uri = orte_rml.get_contact_info();
|
||||||
proc->state = ORTE_PROC_STATE_RUNNING;
|
proc->state = ORTE_PROC_STATE_RUNNING;
|
||||||
OBJ_RETAIN(node); /* keep accounting straight */
|
OBJ_RETAIN(node); /* keep accounting straight */
|
||||||
@ -431,7 +431,7 @@ static int rte_init(char flags)
|
|||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ORTE_SUCCESS != (ret = orte_snapc_base_select(orte_process_info.hnp, !orte_process_info.daemon))) {
|
if (ORTE_SUCCESS != (ret = orte_snapc_base_select(orte_proc_info.hnp, !orte_proc_info.daemon))) {
|
||||||
ORTE_ERROR_LOG(ret);
|
ORTE_ERROR_LOG(ret);
|
||||||
error = "orte_snapc_base_select";
|
error = "orte_snapc_base_select";
|
||||||
goto error;
|
goto error;
|
||||||
@ -489,7 +489,7 @@ static int rte_finalize(void)
|
|||||||
int i;
|
int i;
|
||||||
|
|
||||||
/* remove my contact info file */
|
/* remove my contact info file */
|
||||||
contact_path = opal_os_path(false, orte_process_info.top_session_dir,
|
contact_path = opal_os_path(false, orte_proc_info.top_session_dir,
|
||||||
"contact.txt", NULL);
|
"contact.txt", NULL);
|
||||||
unlink(contact_path);
|
unlink(contact_path);
|
||||||
free(contact_path);
|
free(contact_path);
|
||||||
|
@ -72,7 +72,7 @@ int orte_ess_lsf_component_query(mca_base_module_t **module, int *priority)
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
if (NULL != getenv("LSB_JOBID") &&
|
if (NULL != getenv("LSB_JOBID") &&
|
||||||
NULL != orte_process_info.my_hnp_uri) {
|
NULL != orte_proc_info.my_hnp_uri) {
|
||||||
*priority = 40;
|
*priority = 40;
|
||||||
*module = (mca_base_module_t *)&orte_ess_lsf_module;
|
*module = (mca_base_module_t *)&orte_ess_lsf_module;
|
||||||
return ORTE_SUCCESS;
|
return ORTE_SUCCESS;
|
||||||
|
@ -96,13 +96,13 @@ static int rte_init(char flags)
|
|||||||
/* if I am a daemon, complete my setup using the
|
/* if I am a daemon, complete my setup using the
|
||||||
* default procedure
|
* default procedure
|
||||||
*/
|
*/
|
||||||
if (orte_process_info.daemon) {
|
if (orte_proc_info.daemon) {
|
||||||
if (ORTE_SUCCESS != (ret = orte_ess_base_orted_setup())) {
|
if (ORTE_SUCCESS != (ret = orte_ess_base_orted_setup())) {
|
||||||
ORTE_ERROR_LOG(ret);
|
ORTE_ERROR_LOG(ret);
|
||||||
error = "orte_ess_base_orted_setup";
|
error = "orte_ess_base_orted_setup";
|
||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
} else if (orte_process_info.tool) {
|
} else if (orte_proc_info.tool) {
|
||||||
/* otherwise, if I am a tool proc, use that procedure */
|
/* otherwise, if I am a tool proc, use that procedure */
|
||||||
if (ORTE_SUCCESS != (ret = orte_ess_base_tool_setup())) {
|
if (ORTE_SUCCESS != (ret = orte_ess_base_tool_setup())) {
|
||||||
ORTE_ERROR_LOG(ret);
|
ORTE_ERROR_LOG(ret);
|
||||||
@ -124,7 +124,7 @@ static int rte_init(char flags)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* setup the nidmap arrays */
|
/* setup the nidmap arrays */
|
||||||
if (ORTE_SUCCESS != (ret = orte_util_nidmap_init(orte_process_info.sync_buf))) {
|
if (ORTE_SUCCESS != (ret = orte_util_nidmap_init(orte_proc_info.sync_buf))) {
|
||||||
ORTE_ERROR_LOG(ret);
|
ORTE_ERROR_LOG(ret);
|
||||||
error = "orte_util_nidmap_init";
|
error = "orte_util_nidmap_init";
|
||||||
goto error;
|
goto error;
|
||||||
@ -145,11 +145,11 @@ static int rte_finalize(void)
|
|||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
/* if I am a daemon, finalize using the default procedure */
|
/* if I am a daemon, finalize using the default procedure */
|
||||||
if (orte_process_info.daemon) {
|
if (orte_proc_info.daemon) {
|
||||||
if (ORTE_SUCCESS != (ret = orte_ess_base_orted_finalize())) {
|
if (ORTE_SUCCESS != (ret = orte_ess_base_orted_finalize())) {
|
||||||
ORTE_ERROR_LOG(ret);
|
ORTE_ERROR_LOG(ret);
|
||||||
}
|
}
|
||||||
} else if (orte_process_info.tool) {
|
} else if (orte_proc_info.tool) {
|
||||||
/* otherwise, if I am a tool proc, use that procedure */
|
/* otherwise, if I am a tool proc, use that procedure */
|
||||||
if (ORTE_SUCCESS != (ret = orte_ess_base_tool_finalize())) {
|
if (ORTE_SUCCESS != (ret = orte_ess_base_tool_finalize())) {
|
||||||
ORTE_ERROR_LOG(ret);
|
ORTE_ERROR_LOG(ret);
|
||||||
|
@ -101,7 +101,7 @@ static int rte_init(char flags)
|
|||||||
*/
|
*/
|
||||||
/* split the nidmap string */
|
/* split the nidmap string */
|
||||||
nidmap = opal_argv_split(nidmap_string, ':');
|
nidmap = opal_argv_split(nidmap_string, ':');
|
||||||
orte_process_info.num_procs = (orte_std_cntr_t) opal_argv_count(nidmap);
|
orte_proc_info.num_procs = (orte_std_cntr_t) opal_argv_count(nidmap);
|
||||||
|
|
||||||
/* MPI_Init needs the grpcomm framework, so we have to init it */
|
/* MPI_Init needs the grpcomm framework, so we have to init it */
|
||||||
if (ORTE_SUCCESS != (rc = orte_grpcomm_base_open())) {
|
if (ORTE_SUCCESS != (rc = orte_grpcomm_base_open())) {
|
||||||
@ -156,7 +156,7 @@ static char* proc_get_hostname(orte_process_name_t *proc)
|
|||||||
|
|
||||||
static uint32_t proc_get_arch(orte_process_name_t *proc)
|
static uint32_t proc_get_arch(orte_process_name_t *proc)
|
||||||
{
|
{
|
||||||
return orte_process_info.arch;
|
return orte_proc_info.arch;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int update_arch(orte_process_name_t *proc, uint32_t arch)
|
static int update_arch(orte_process_name_t *proc, uint32_t arch)
|
||||||
|
@ -73,9 +73,9 @@ int orte_ess_singleton_component_query(mca_base_module_t **module, int *priority
|
|||||||
/* if we are an HNP, daemon, or tool, then we
|
/* if we are an HNP, daemon, or tool, then we
|
||||||
* are definitely not a singleton!
|
* are definitely not a singleton!
|
||||||
*/
|
*/
|
||||||
if (orte_process_info.hnp ||
|
if (orte_proc_info.hnp ||
|
||||||
orte_process_info.daemon ||
|
orte_proc_info.daemon ||
|
||||||
orte_process_info.tool) {
|
orte_proc_info.tool) {
|
||||||
*module = NULL;
|
*module = NULL;
|
||||||
return ORTE_ERROR;
|
return ORTE_ERROR;
|
||||||
}
|
}
|
||||||
@ -85,7 +85,7 @@ int orte_ess_singleton_component_query(mca_base_module_t **module, int *priority
|
|||||||
* given an HNP URI, then we are definitely
|
* given an HNP URI, then we are definitely
|
||||||
* not a singleton
|
* not a singleton
|
||||||
*/
|
*/
|
||||||
if (NULL != orte_process_info.my_hnp_uri) {
|
if (NULL != orte_proc_info.my_hnp_uri) {
|
||||||
*module = NULL;
|
*module = NULL;
|
||||||
return ORTE_ERROR;
|
return ORTE_ERROR;
|
||||||
}
|
}
|
||||||
|
@ -144,7 +144,7 @@ static int rte_init(char flags)
|
|||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
|
|
||||||
orte_process_info.num_procs = 1;
|
orte_proc_info.num_procs = 1;
|
||||||
|
|
||||||
/* NOTE: do not wireup our io - let the fork'd orted serve
|
/* NOTE: do not wireup our io - let the fork'd orted serve
|
||||||
* as our io handler. This prevents issues with the event
|
* as our io handler. This prevents issues with the event
|
||||||
@ -275,8 +275,8 @@ static int fork_hnp(void)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* Fork off the child */
|
/* Fork off the child */
|
||||||
orte_process_info.hnp_pid = fork();
|
orte_proc_info.hnp_pid = fork();
|
||||||
if(orte_process_info.hnp_pid < 0) {
|
if(orte_proc_info.hnp_pid < 0) {
|
||||||
ORTE_ERROR_LOG(ORTE_ERR_SYS_LIMITS_CHILDREN);
|
ORTE_ERROR_LOG(ORTE_ERR_SYS_LIMITS_CHILDREN);
|
||||||
close(p[0]);
|
close(p[0]);
|
||||||
close(p[1]);
|
close(p[1]);
|
||||||
@ -286,7 +286,7 @@ static int fork_hnp(void)
|
|||||||
return ORTE_ERR_SYS_LIMITS_CHILDREN;
|
return ORTE_ERR_SYS_LIMITS_CHILDREN;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (orte_process_info.hnp_pid == 0) {
|
if (orte_proc_info.hnp_pid == 0) {
|
||||||
close(p[0]);
|
close(p[0]);
|
||||||
close(death_pipe[1]);
|
close(death_pipe[1]);
|
||||||
/* I am the child - exec me */
|
/* I am the child - exec me */
|
||||||
@ -368,13 +368,13 @@ static int fork_hnp(void)
|
|||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
/* save the daemon uri - we will process it later */
|
/* save the daemon uri - we will process it later */
|
||||||
orte_process_info.my_daemon_uri = strdup(orted_uri);
|
orte_proc_info.my_daemon_uri = strdup(orted_uri);
|
||||||
|
|
||||||
/* likewise, since this is also the HNP, set that uri too */
|
/* likewise, since this is also the HNP, set that uri too */
|
||||||
orte_process_info.my_hnp_uri = strdup(orted_uri);
|
orte_proc_info.my_hnp_uri = strdup(orted_uri);
|
||||||
|
|
||||||
/* indicate we are a singleton so orte_init knows what to do */
|
/* indicate we are a singleton so orte_init knows what to do */
|
||||||
orte_process_info.singleton = true;
|
orte_proc_info.singleton = true;
|
||||||
/* all done - report success */
|
/* all done - report success */
|
||||||
free(orted_uri);
|
free(orted_uri);
|
||||||
return ORTE_SUCCESS;
|
return ORTE_SUCCESS;
|
||||||
|
@ -209,7 +209,7 @@ static char* proc_get_hostname(orte_process_name_t *proc)
|
|||||||
/* if it is me, the answer is my nodename */
|
/* if it is me, the answer is my nodename */
|
||||||
if (proc->jobid == ORTE_PROC_MY_NAME->jobid &&
|
if (proc->jobid == ORTE_PROC_MY_NAME->jobid &&
|
||||||
proc->vpid == ORTE_PROC_MY_NAME->vpid) {
|
proc->vpid == ORTE_PROC_MY_NAME->vpid) {
|
||||||
return orte_process_info.nodename;
|
return orte_proc_info.nodename;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* otherwise, no idea */
|
/* otherwise, no idea */
|
||||||
@ -221,7 +221,7 @@ static uint32_t proc_get_arch(orte_process_name_t *proc)
|
|||||||
/* if it is me, the answer is my arch */
|
/* if it is me, the answer is my arch */
|
||||||
if (proc->jobid == ORTE_PROC_MY_NAME->jobid &&
|
if (proc->jobid == ORTE_PROC_MY_NAME->jobid &&
|
||||||
proc->vpid == ORTE_PROC_MY_NAME->vpid) {
|
proc->vpid == ORTE_PROC_MY_NAME->vpid) {
|
||||||
return orte_process_info.arch;
|
return orte_proc_info.arch;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* otherwise, no idea */
|
/* otherwise, no idea */
|
||||||
@ -401,12 +401,12 @@ static int rte_ft_event(int state)
|
|||||||
* Restart the routed framework
|
* Restart the routed framework
|
||||||
* JJH: Lie to the finalize function so it does not try to contact the daemon.
|
* JJH: Lie to the finalize function so it does not try to contact the daemon.
|
||||||
*/
|
*/
|
||||||
orte_process_info.tool = true;
|
orte_proc_info.tool = true;
|
||||||
if (ORTE_SUCCESS != (ret = orte_routed.finalize()) ) {
|
if (ORTE_SUCCESS != (ret = orte_routed.finalize()) ) {
|
||||||
exit_status = ret;
|
exit_status = ret;
|
||||||
goto cleanup;
|
goto cleanup;
|
||||||
}
|
}
|
||||||
orte_process_info.tool = false;
|
orte_proc_info.tool = false;
|
||||||
if (ORTE_SUCCESS != (ret = orte_routed.initialize()) ) {
|
if (ORTE_SUCCESS != (ret = orte_routed.initialize()) ) {
|
||||||
exit_status = ret;
|
exit_status = ret;
|
||||||
goto cleanup;
|
goto cleanup;
|
||||||
@ -452,14 +452,14 @@ static int rte_ft_event(int state)
|
|||||||
* Session directory re-init
|
* Session directory re-init
|
||||||
*/
|
*/
|
||||||
if (ORTE_SUCCESS != (ret = orte_session_dir(true,
|
if (ORTE_SUCCESS != (ret = orte_session_dir(true,
|
||||||
orte_process_info.tmpdir_base,
|
orte_proc_info.tmpdir_base,
|
||||||
orte_process_info.nodename,
|
orte_proc_info.nodename,
|
||||||
NULL, /* Batch ID -- Not used */
|
NULL, /* Batch ID -- Not used */
|
||||||
ORTE_PROC_MY_NAME))) {
|
ORTE_PROC_MY_NAME))) {
|
||||||
exit_status = ret;
|
exit_status = ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
opal_output_set_output_file_info(orte_process_info.proc_session_dir,
|
opal_output_set_output_file_info(orte_proc_info.proc_session_dir,
|
||||||
"output-", NULL, NULL);
|
"output-", NULL, NULL);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -486,13 +486,13 @@ static int rte_ft_event(int state)
|
|||||||
* - Note: BLCR does this because it tries to preseve the PID
|
* - Note: BLCR does this because it tries to preseve the PID
|
||||||
* of the program across checkpointes
|
* of the program across checkpointes
|
||||||
*/
|
*/
|
||||||
if( ORTE_SUCCESS != (ret = ess_slave_ft_event_update_process_info(orte_process_info.my_name, getpid())) ) {
|
if( ORTE_SUCCESS != (ret = ess_slave_ft_event_update_process_info(orte_proc_info.my_name, getpid())) ) {
|
||||||
exit_status = ret;
|
exit_status = ret;
|
||||||
goto cleanup;
|
goto cleanup;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* if one was provided, build my nidmap */
|
/* if one was provided, build my nidmap */
|
||||||
if (ORTE_SUCCESS != (ret = orte_util_nidmap_init(orte_process_info.sync_buf))) {
|
if (ORTE_SUCCESS != (ret = orte_util_nidmap_init(orte_proc_info.sync_buf))) {
|
||||||
ORTE_ERROR_LOG(ret);
|
ORTE_ERROR_LOG(ret);
|
||||||
exit_status = ret;
|
exit_status = ret;
|
||||||
goto cleanup;
|
goto cleanup;
|
||||||
|
@ -76,7 +76,7 @@ int orte_ess_slurm_component_query(mca_base_module_t **module, int *priority)
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
if (NULL != getenv("SLURM_JOBID") &&
|
if (NULL != getenv("SLURM_JOBID") &&
|
||||||
NULL != orte_process_info.my_hnp_uri) {
|
NULL != orte_proc_info.my_hnp_uri) {
|
||||||
*priority = 30;
|
*priority = 30;
|
||||||
*module = (mca_base_module_t *)&orte_ess_slurm_module;
|
*module = (mca_base_module_t *)&orte_ess_slurm_module;
|
||||||
return ORTE_SUCCESS;
|
return ORTE_SUCCESS;
|
||||||
|
@ -108,7 +108,7 @@ static int rte_init(char flags)
|
|||||||
/* if I am a daemon, complete my setup using the
|
/* if I am a daemon, complete my setup using the
|
||||||
* default procedure
|
* default procedure
|
||||||
*/
|
*/
|
||||||
if (orte_process_info.daemon) {
|
if (orte_proc_info.daemon) {
|
||||||
if (ORTE_SUCCESS != (ret = orte_ess_base_orted_setup())) {
|
if (ORTE_SUCCESS != (ret = orte_ess_base_orted_setup())) {
|
||||||
ORTE_ERROR_LOG(ret);
|
ORTE_ERROR_LOG(ret);
|
||||||
error = "orte_ess_base_orted_setup";
|
error = "orte_ess_base_orted_setup";
|
||||||
@ -140,7 +140,7 @@ static int rte_init(char flags)
|
|||||||
}
|
}
|
||||||
return ORTE_SUCCESS;
|
return ORTE_SUCCESS;
|
||||||
}
|
}
|
||||||
} else if (orte_process_info.tool) {
|
} else if (orte_proc_info.tool) {
|
||||||
/* otherwise, if I am a tool proc, use that procedure */
|
/* otherwise, if I am a tool proc, use that procedure */
|
||||||
if (ORTE_SUCCESS != (ret = orte_ess_base_tool_setup())) {
|
if (ORTE_SUCCESS != (ret = orte_ess_base_tool_setup())) {
|
||||||
ORTE_ERROR_LOG(ret);
|
ORTE_ERROR_LOG(ret);
|
||||||
@ -162,7 +162,7 @@ static int rte_init(char flags)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* setup the nidmap arrays */
|
/* setup the nidmap arrays */
|
||||||
if (ORTE_SUCCESS != (ret = orte_util_nidmap_init(orte_process_info.sync_buf))) {
|
if (ORTE_SUCCESS != (ret = orte_util_nidmap_init(orte_proc_info.sync_buf))) {
|
||||||
ORTE_ERROR_LOG(ret);
|
ORTE_ERROR_LOG(ret);
|
||||||
error = "orte_util_nidmap_init";
|
error = "orte_util_nidmap_init";
|
||||||
goto error;
|
goto error;
|
||||||
@ -183,11 +183,11 @@ static int rte_finalize(void)
|
|||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
/* if I am a daemon, finalize using the default procedure */
|
/* if I am a daemon, finalize using the default procedure */
|
||||||
if (orte_process_info.daemon) {
|
if (orte_proc_info.daemon) {
|
||||||
if (ORTE_SUCCESS != (ret = orte_ess_base_orted_finalize())) {
|
if (ORTE_SUCCESS != (ret = orte_ess_base_orted_finalize())) {
|
||||||
ORTE_ERROR_LOG(ret);
|
ORTE_ERROR_LOG(ret);
|
||||||
}
|
}
|
||||||
} else if (orte_process_info.tool) {
|
} else if (orte_proc_info.tool) {
|
||||||
/* otherwise, if I am a tool proc, use that procedure */
|
/* otherwise, if I am a tool proc, use that procedure */
|
||||||
if (ORTE_SUCCESS != (ret = orte_ess_base_tool_finalize())) {
|
if (ORTE_SUCCESS != (ret = orte_ess_base_tool_finalize())) {
|
||||||
ORTE_ERROR_LOG(ret);
|
ORTE_ERROR_LOG(ret);
|
||||||
@ -420,15 +420,15 @@ static int slurm_set_name(void)
|
|||||||
"ess:slurm set name to %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
"ess:slurm set name to %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||||
|
|
||||||
/* fix up the system info nodename to match exactly what slurm returned */
|
/* fix up the system info nodename to match exactly what slurm returned */
|
||||||
if (NULL != orte_process_info.nodename) {
|
if (NULL != orte_proc_info.nodename) {
|
||||||
free(orte_process_info.nodename);
|
free(orte_proc_info.nodename);
|
||||||
}
|
}
|
||||||
orte_process_info.nodename = get_slurm_nodename(slurm_nodeid);
|
orte_proc_info.nodename = get_slurm_nodename(slurm_nodeid);
|
||||||
|
|
||||||
|
|
||||||
OPAL_OUTPUT_VERBOSE((1, orte_ess_base_output,
|
OPAL_OUTPUT_VERBOSE((1, orte_ess_base_output,
|
||||||
"ess:slurm set nodename to %s",
|
"ess:slurm set nodename to %s",
|
||||||
orte_process_info.nodename));
|
orte_proc_info.nodename));
|
||||||
|
|
||||||
/* get the non-name common environmental variables */
|
/* get the non-name common environmental variables */
|
||||||
if (ORTE_SUCCESS != (rc = orte_ess_env_get())) {
|
if (ORTE_SUCCESS != (rc = orte_ess_env_get())) {
|
||||||
@ -554,7 +554,7 @@ static int build_daemon_nidmap(void)
|
|||||||
/* construct the URI */
|
/* construct the URI */
|
||||||
proc.vpid = node->daemon;
|
proc.vpid = node->daemon;
|
||||||
orte_util_convert_process_name_to_string(&proc_name, &proc);
|
orte_util_convert_process_name_to_string(&proc_name, &proc);
|
||||||
asprintf(&uri, "%s;tcp://%s:%d", proc_name, addr, (int)orte_process_info.my_port);
|
asprintf(&uri, "%s;tcp://%s:%d", proc_name, addr, (int)orte_proc_info.my_port);
|
||||||
opal_output(0, "contact info %s", uri);
|
opal_output(0, "contact info %s", uri);
|
||||||
opal_dss.pack(&buf, &uri, 1, OPAL_STRING);
|
opal_dss.pack(&buf, &uri, 1, OPAL_STRING);
|
||||||
free(proc_name);
|
free(proc_name);
|
||||||
|
@ -74,10 +74,10 @@ int orte_ess_slurmd_component_query(mca_base_module_t **module, int *priority)
|
|||||||
* by mpirun but are in a slurm world
|
* by mpirun but are in a slurm world
|
||||||
*/
|
*/
|
||||||
|
|
||||||
if (orte_process_info.mpi_proc &&
|
if (orte_proc_info.mpi_proc &&
|
||||||
NULL != getenv("SLURM_JOBID") &&
|
NULL != getenv("SLURM_JOBID") &&
|
||||||
NULL != getenv("SLURM_STEPID") &&
|
NULL != getenv("SLURM_STEPID") &&
|
||||||
NULL == orte_process_info.my_hnp_uri) {
|
NULL == orte_proc_info.my_hnp_uri) {
|
||||||
*priority = 30;
|
*priority = 30;
|
||||||
*module = (mca_base_module_t *)&orte_ess_slurmd_module;
|
*module = (mca_base_module_t *)&orte_ess_slurmd_module;
|
||||||
return ORTE_SUCCESS;
|
return ORTE_SUCCESS;
|
||||||
|
@ -168,7 +168,7 @@ static int rte_init(char flags)
|
|||||||
error = "could not get SLURM_STEP_NUM_TASKS";
|
error = "could not get SLURM_STEP_NUM_TASKS";
|
||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
orte_process_info.num_procs = strtol(envar, NULL, 10);
|
orte_proc_info.num_procs = strtol(envar, NULL, 10);
|
||||||
|
|
||||||
/* get my local nodeid */
|
/* get my local nodeid */
|
||||||
if (NULL == (envar = getenv("SLURM_NODEID"))) {
|
if (NULL == (envar = getenv("SLURM_NODEID"))) {
|
||||||
@ -207,7 +207,7 @@ static int rte_init(char flags)
|
|||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
num_nodes = opal_argv_count(nodes);
|
num_nodes = opal_argv_count(nodes);
|
||||||
orte_process_info.num_nodes = num_nodes;
|
orte_proc_info.num_nodes = num_nodes;
|
||||||
|
|
||||||
/* compute the ppn */
|
/* compute the ppn */
|
||||||
if (ORTE_SUCCESS != (ret = orte_regex_extract_ppn(num_nodes, tasks_per_node, &ppn))) {
|
if (ORTE_SUCCESS != (ret = orte_regex_extract_ppn(num_nodes, tasks_per_node, &ppn))) {
|
||||||
@ -245,7 +245,7 @@ static int rte_init(char flags)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* set the size of the nidmap storage so we minimize realloc's */
|
/* set the size of the nidmap storage so we minimize realloc's */
|
||||||
if (ORTE_SUCCESS != (ret = opal_pointer_array_set_size(&orte_nidmap, orte_process_info.num_nodes))) {
|
if (ORTE_SUCCESS != (ret = opal_pointer_array_set_size(&orte_nidmap, orte_proc_info.num_nodes))) {
|
||||||
error = "could not set pointer array size for nidmap";
|
error = "could not set pointer array size for nidmap";
|
||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
@ -264,7 +264,7 @@ static int rte_init(char flags)
|
|||||||
jmap->job = ORTE_PROC_MY_NAME->jobid;
|
jmap->job = ORTE_PROC_MY_NAME->jobid;
|
||||||
opal_pointer_array_add(&orte_jobmap, jmap);
|
opal_pointer_array_add(&orte_jobmap, jmap);
|
||||||
/* update the num procs */
|
/* update the num procs */
|
||||||
jmap->num_procs = orte_process_info.num_procs;
|
jmap->num_procs = orte_proc_info.num_procs;
|
||||||
/* set the size of the pidmap storage so we minimize realloc's */
|
/* set the size of the pidmap storage so we minimize realloc's */
|
||||||
if (ORTE_SUCCESS != (ret = opal_pointer_array_set_size(&jmap->pmap, jmap->num_procs))) {
|
if (ORTE_SUCCESS != (ret = opal_pointer_array_set_size(&jmap->pmap, jmap->num_procs))) {
|
||||||
ORTE_ERROR_LOG(ret);
|
ORTE_ERROR_LOG(ret);
|
||||||
@ -301,8 +301,8 @@ static int rte_init(char flags)
|
|||||||
} else if (cyclic) {
|
} else if (cyclic) {
|
||||||
/* cycle across the nodes */
|
/* cycle across the nodes */
|
||||||
vpid = 0;
|
vpid = 0;
|
||||||
while (vpid < orte_process_info.num_procs) {
|
while (vpid < orte_proc_info.num_procs) {
|
||||||
for (i=0; i < num_nodes && vpid < orte_process_info.num_procs; i++) {
|
for (i=0; i < num_nodes && vpid < orte_proc_info.num_procs; i++) {
|
||||||
if (0 < ppn[i]) {
|
if (0 < ppn[i]) {
|
||||||
node = (orte_nid_t*)orte_nidmap.addr[i];
|
node = (orte_nid_t*)orte_nidmap.addr[i];
|
||||||
pmap = OBJ_NEW(orte_pmap_t);
|
pmap = OBJ_NEW(orte_pmap_t);
|
||||||
|
@ -74,7 +74,7 @@ int orte_ess_tool_component_query(mca_base_module_t **module, int *priority)
|
|||||||
* precedence. This would happen, for example,
|
* precedence. This would happen, for example,
|
||||||
* if the tool is a distributed set of processes
|
* if the tool is a distributed set of processes
|
||||||
*/
|
*/
|
||||||
if (orte_process_info.tool) {
|
if (orte_proc_info.tool) {
|
||||||
*priority = 10;
|
*priority = 10;
|
||||||
*module = (mca_base_module_t *)&orte_ess_tool_module;
|
*module = (mca_base_module_t *)&orte_ess_tool_module;
|
||||||
return ORTE_SUCCESS;
|
return ORTE_SUCCESS;
|
||||||
|
@ -225,7 +225,7 @@ int orte_filem_base_get_proc_node_name(orte_process_name_t *proc, char **machine
|
|||||||
/* set default answer */
|
/* set default answer */
|
||||||
*machine_name = NULL;
|
*machine_name = NULL;
|
||||||
|
|
||||||
if (orte_process_info.hnp) {
|
if (orte_proc_info.hnp) {
|
||||||
/* if I am the HNP, then all the data structures are local to me - no
|
/* if I am the HNP, then all the data structures are local to me - no
|
||||||
* need to send messages around to get the info
|
* need to send messages around to get the info
|
||||||
*/
|
*/
|
||||||
|
@ -68,7 +68,7 @@ int orte_filem_base_comm_start(void)
|
|||||||
int rc;
|
int rc;
|
||||||
|
|
||||||
/* Only active in HNP and daemons */
|
/* Only active in HNP and daemons */
|
||||||
if( !orte_process_info.hnp && !orte_process_info.daemon ) {
|
if( !orte_proc_info.hnp && !orte_proc_info.daemon ) {
|
||||||
return ORTE_SUCCESS;
|
return ORTE_SUCCESS;
|
||||||
}
|
}
|
||||||
if ( recv_issued ) {
|
if ( recv_issued ) {
|
||||||
@ -98,7 +98,7 @@ int orte_filem_base_comm_stop(void)
|
|||||||
int rc;
|
int rc;
|
||||||
|
|
||||||
/* Only active in HNP and daemons */
|
/* Only active in HNP and daemons */
|
||||||
if( !orte_process_info.hnp && !orte_process_info.daemon ) {
|
if( !orte_proc_info.hnp && !orte_proc_info.daemon ) {
|
||||||
return ORTE_SUCCESS;
|
return ORTE_SUCCESS;
|
||||||
}
|
}
|
||||||
if ( recv_issued ) {
|
if ( recv_issued ) {
|
||||||
|
@ -622,7 +622,7 @@ static int orte_filem_rsh_start_copy(orte_filem_base_request_t *request) {
|
|||||||
f_set->remote_target));
|
f_set->remote_target));
|
||||||
orte_show_help("help-orte-filem-rsh.txt",
|
orte_show_help("help-orte-filem-rsh.txt",
|
||||||
"orte-filem-rsh:get-file-not-exist",
|
"orte-filem-rsh:get-file-not-exist",
|
||||||
true, f_set->local_target, orte_process_info.nodename);
|
true, f_set->local_target, orte_proc_info.nodename);
|
||||||
request->is_done[cur_index] = true;
|
request->is_done[cur_index] = true;
|
||||||
request->is_active[cur_index] = true;
|
request->is_active[cur_index] = true;
|
||||||
request->exit_status[cur_index] = -1;
|
request->exit_status[cur_index] = -1;
|
||||||
@ -645,7 +645,7 @@ static int orte_filem_rsh_start_copy(orte_filem_base_request_t *request) {
|
|||||||
f_set->local_target));
|
f_set->local_target));
|
||||||
orte_show_help("help-orte-filem-rsh.txt",
|
orte_show_help("help-orte-filem-rsh.txt",
|
||||||
"orte-filem-rsh:get-file-exists",
|
"orte-filem-rsh:get-file-exists",
|
||||||
true, f_set->local_target, orte_process_info.nodename);
|
true, f_set->local_target, orte_proc_info.nodename);
|
||||||
request->is_done[cur_index] = true;
|
request->is_done[cur_index] = true;
|
||||||
request->is_active[cur_index] = true;
|
request->is_active[cur_index] = true;
|
||||||
request->exit_status[cur_index] = -1;
|
request->exit_status[cur_index] = -1;
|
||||||
|
@ -88,7 +88,7 @@ static int init(void)
|
|||||||
/* if we are a daemon or the hnp, we need to post a
|
/* if we are a daemon or the hnp, we need to post a
|
||||||
* recv to catch any collective operations
|
* recv to catch any collective operations
|
||||||
*/
|
*/
|
||||||
if (orte_process_info.daemon || orte_process_info.hnp) {
|
if (orte_proc_info.daemon || orte_proc_info.hnp) {
|
||||||
if (ORTE_SUCCESS != (rc = orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD,
|
if (ORTE_SUCCESS != (rc = orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD,
|
||||||
ORTE_RML_TAG_DAEMON_COLLECTIVE,
|
ORTE_RML_TAG_DAEMON_COLLECTIVE,
|
||||||
ORTE_RML_NON_PERSISTENT,
|
ORTE_RML_NON_PERSISTENT,
|
||||||
@ -111,7 +111,7 @@ static void finalize(void)
|
|||||||
/* if we are a daemon or the hnp, we need to cancel the
|
/* if we are a daemon or the hnp, we need to cancel the
|
||||||
* recv we posted
|
* recv we posted
|
||||||
*/
|
*/
|
||||||
if (orte_process_info.daemon || orte_process_info.hnp) {
|
if (orte_proc_info.daemon || orte_proc_info.hnp) {
|
||||||
orte_rml.recv_cancel(ORTE_NAME_WILDCARD, ORTE_RML_TAG_DAEMON_COLLECTIVE);
|
orte_rml.recv_cancel(ORTE_NAME_WILDCARD, ORTE_RML_TAG_DAEMON_COLLECTIVE);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -203,7 +203,7 @@ static int xcast(orte_jobid_t job,
|
|||||||
* fire right away, but that's okay
|
* fire right away, but that's okay
|
||||||
* The macro makes a copy of the buffer, so it's okay to release it here
|
* The macro makes a copy of the buffer, so it's okay to release it here
|
||||||
*/
|
*/
|
||||||
if (orte_process_info.hnp) {
|
if (orte_proc_info.hnp) {
|
||||||
ORTE_MESSAGE_EVENT(ORTE_PROC_MY_NAME, &buf, ORTE_RML_TAG_DAEMON, orte_daemon_cmd_processor);
|
ORTE_MESSAGE_EVENT(ORTE_PROC_MY_NAME, &buf, ORTE_RML_TAG_DAEMON, orte_daemon_cmd_processor);
|
||||||
} else {
|
} else {
|
||||||
/* otherwise, send it to the HNP for relay */
|
/* otherwise, send it to the HNP for relay */
|
||||||
@ -542,7 +542,7 @@ static int daemon_collective(orte_process_name_t *sender, opal_buffer_t *data)
|
|||||||
|
|
||||||
if (jobdat->num_collected == jobdat->num_participating) {
|
if (jobdat->num_collected == jobdat->num_participating) {
|
||||||
/* if I am the HNP, go process the results */
|
/* if I am the HNP, go process the results */
|
||||||
if (orte_process_info.hnp) {
|
if (orte_proc_info.hnp) {
|
||||||
goto hnp_process;
|
goto hnp_process;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -79,7 +79,7 @@ int orte_grpcomm_base_full_modex(opal_list_t *procs, bool modex_db)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* pack our hostname */
|
/* pack our hostname */
|
||||||
if (ORTE_SUCCESS != (rc = opal_dss.pack(&buf, &orte_process_info.nodename, 1, OPAL_STRING))) {
|
if (ORTE_SUCCESS != (rc = opal_dss.pack(&buf, &orte_proc_info.nodename, 1, OPAL_STRING))) {
|
||||||
ORTE_ERROR_LOG(rc);
|
ORTE_ERROR_LOG(rc);
|
||||||
goto cleanup;
|
goto cleanup;
|
||||||
}
|
}
|
||||||
@ -91,7 +91,7 @@ int orte_grpcomm_base_full_modex(opal_list_t *procs, bool modex_db)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* pack our arch */
|
/* pack our arch */
|
||||||
if (ORTE_SUCCESS != (rc = opal_dss.pack(&buf, &orte_process_info.arch, 1, OPAL_UINT32))) {
|
if (ORTE_SUCCESS != (rc = opal_dss.pack(&buf, &orte_proc_info.arch, 1, OPAL_UINT32))) {
|
||||||
ORTE_ERROR_LOG(rc);
|
ORTE_ERROR_LOG(rc);
|
||||||
goto cleanup;
|
goto cleanup;
|
||||||
}
|
}
|
||||||
@ -350,7 +350,7 @@ int orte_grpcomm_base_peer_modex(bool modex_db)
|
|||||||
goto cleanup;
|
goto cleanup;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ORTE_SUCCESS != (rc = opal_dss.pack(&buf, &orte_process_info.arch, 1, OPAL_UINT32))) {
|
if (ORTE_SUCCESS != (rc = opal_dss.pack(&buf, &orte_proc_info.arch, 1, OPAL_UINT32))) {
|
||||||
ORTE_ERROR_LOG(rc);
|
ORTE_ERROR_LOG(rc);
|
||||||
goto cleanup;
|
goto cleanup;
|
||||||
}
|
}
|
||||||
|
@ -104,17 +104,17 @@ static int init(void)
|
|||||||
ORTE_ERROR_LOG(rc);
|
ORTE_ERROR_LOG(rc);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (opal_profile && orte_process_info.mpi_proc) {
|
if (opal_profile && orte_proc_info.mpi_proc) {
|
||||||
/* if I am an MPI application proc, then create a buffer
|
/* if I am an MPI application proc, then create a buffer
|
||||||
* to pack all my attributes in */
|
* to pack all my attributes in */
|
||||||
profile_buf = OBJ_NEW(opal_buffer_t);
|
profile_buf = OBJ_NEW(opal_buffer_t);
|
||||||
/* seed it with the node name */
|
/* seed it with the node name */
|
||||||
if (ORTE_SUCCESS != (rc = opal_dss.pack(profile_buf, &orte_process_info.nodename, 1, OPAL_STRING))) {
|
if (ORTE_SUCCESS != (rc = opal_dss.pack(profile_buf, &orte_proc_info.nodename, 1, OPAL_STRING))) {
|
||||||
ORTE_ERROR_LOG(rc);
|
ORTE_ERROR_LOG(rc);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (orte_process_info.hnp && recv_on) {
|
if (orte_proc_info.hnp && recv_on) {
|
||||||
/* open the profile file for writing */
|
/* open the profile file for writing */
|
||||||
if (NULL == opal_profile_file) {
|
if (NULL == opal_profile_file) {
|
||||||
/* no file specified - we will just ignore any incoming data */
|
/* no file specified - we will just ignore any incoming data */
|
||||||
@ -140,7 +140,7 @@ static int init(void)
|
|||||||
/* if we are a daemon or the hnp, we need to post a
|
/* if we are a daemon or the hnp, we need to post a
|
||||||
* recv to catch any collective operations
|
* recv to catch any collective operations
|
||||||
*/
|
*/
|
||||||
if (orte_process_info.daemon || orte_process_info.hnp) {
|
if (orte_proc_info.daemon || orte_proc_info.hnp) {
|
||||||
if (ORTE_SUCCESS != (rc = orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD,
|
if (ORTE_SUCCESS != (rc = orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD,
|
||||||
ORTE_RML_TAG_DAEMON_COLLECTIVE,
|
ORTE_RML_TAG_DAEMON_COLLECTIVE,
|
||||||
ORTE_RML_NON_PERSISTENT,
|
ORTE_RML_NON_PERSISTENT,
|
||||||
@ -163,7 +163,7 @@ static void finalize(void)
|
|||||||
|
|
||||||
orte_grpcomm_base_modex_finalize();
|
orte_grpcomm_base_modex_finalize();
|
||||||
|
|
||||||
if (opal_profile && orte_process_info.mpi_proc) {
|
if (opal_profile && orte_proc_info.mpi_proc) {
|
||||||
/* if I am an MPI proc, send my buffer to the collector */
|
/* if I am an MPI proc, send my buffer to the collector */
|
||||||
boptr = &bo;
|
boptr = &bo;
|
||||||
opal_dss.unload(profile_buf, (void**)&boptr->bytes, &boptr->size);
|
opal_dss.unload(profile_buf, (void**)&boptr->bytes, &boptr->size);
|
||||||
@ -177,7 +177,7 @@ static void finalize(void)
|
|||||||
OBJ_DESTRUCT(&profile);
|
OBJ_DESTRUCT(&profile);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (orte_process_info.hnp && recv_on) {
|
if (orte_proc_info.hnp && recv_on) {
|
||||||
/* if we are profiling and I am the HNP, then stop the
|
/* if we are profiling and I am the HNP, then stop the
|
||||||
* profiling receive
|
* profiling receive
|
||||||
*/
|
*/
|
||||||
@ -191,7 +191,7 @@ static void finalize(void)
|
|||||||
/* if we are a daemon or the hnp, we need to cancel the
|
/* if we are a daemon or the hnp, we need to cancel the
|
||||||
* recv we posted
|
* recv we posted
|
||||||
*/
|
*/
|
||||||
if (orte_process_info.daemon || orte_process_info.hnp) {
|
if (orte_proc_info.daemon || orte_proc_info.hnp) {
|
||||||
orte_rml.recv_cancel(ORTE_NAME_WILDCARD, ORTE_RML_TAG_DAEMON_COLLECTIVE);
|
orte_rml.recv_cancel(ORTE_NAME_WILDCARD, ORTE_RML_TAG_DAEMON_COLLECTIVE);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -283,7 +283,7 @@ static int xcast(orte_jobid_t job,
|
|||||||
* fire right away, but that's okay
|
* fire right away, but that's okay
|
||||||
* The macro makes a copy of the buffer, so it's okay to release it here
|
* The macro makes a copy of the buffer, so it's okay to release it here
|
||||||
*/
|
*/
|
||||||
if (orte_process_info.hnp) {
|
if (orte_proc_info.hnp) {
|
||||||
ORTE_MESSAGE_EVENT(ORTE_PROC_MY_NAME, &buf, ORTE_RML_TAG_DAEMON, orte_daemon_cmd_processor);
|
ORTE_MESSAGE_EVENT(ORTE_PROC_MY_NAME, &buf, ORTE_RML_TAG_DAEMON, orte_daemon_cmd_processor);
|
||||||
} else {
|
} else {
|
||||||
/* otherwise, send it to the HNP for relay */
|
/* otherwise, send it to the HNP for relay */
|
||||||
@ -930,7 +930,7 @@ static int daemon_collective(orte_process_name_t *sender, opal_buffer_t *data)
|
|||||||
|
|
||||||
if (jobdat->num_collected == jobdat->num_participating) {
|
if (jobdat->num_collected == jobdat->num_participating) {
|
||||||
/* if I am the HNP, go process the results */
|
/* if I am the HNP, go process the results */
|
||||||
if (orte_process_info.hnp) {
|
if (orte_proc_info.hnp) {
|
||||||
goto hnp_process;
|
goto hnp_process;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -209,7 +209,7 @@ static int xcast(orte_jobid_t job,
|
|||||||
* fire right away, but that's okay
|
* fire right away, but that's okay
|
||||||
* The macro makes a copy of the buffer, so it's okay to release it here
|
* The macro makes a copy of the buffer, so it's okay to release it here
|
||||||
*/
|
*/
|
||||||
if (orte_process_info.hnp) {
|
if (orte_proc_info.hnp) {
|
||||||
ORTE_MESSAGE_EVENT(ORTE_PROC_MY_NAME, &buf, ORTE_RML_TAG_DAEMON, orte_daemon_cmd_processor);
|
ORTE_MESSAGE_EVENT(ORTE_PROC_MY_NAME, &buf, ORTE_RML_TAG_DAEMON, orte_daemon_cmd_processor);
|
||||||
} else {
|
} else {
|
||||||
/* otherwise, send it to the HNP for relay */
|
/* otherwise, send it to the HNP for relay */
|
||||||
@ -317,13 +317,13 @@ static int allgather(opal_buffer_t *sbuf, opal_buffer_t *rbuf)
|
|||||||
*/
|
*/
|
||||||
if (0 == my_local_rank) {
|
if (0 == my_local_rank) {
|
||||||
/* we need one entry/node in this job */
|
/* we need one entry/node in this job */
|
||||||
my_coll_peers = (orte_vpid_t*)malloc(orte_process_info.num_nodes * sizeof(orte_vpid_t));
|
my_coll_peers = (orte_vpid_t*)malloc(orte_proc_info.num_nodes * sizeof(orte_vpid_t));
|
||||||
cpeers = 0;
|
cpeers = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* cycle through the procs to create a list of those that are local to me */
|
/* cycle through the procs to create a list of those that are local to me */
|
||||||
proc.jobid = ORTE_PROC_MY_NAME->jobid;
|
proc.jobid = ORTE_PROC_MY_NAME->jobid;
|
||||||
for (v=0; v < orte_process_info.num_procs; v++) {
|
for (v=0; v < orte_proc_info.num_procs; v++) {
|
||||||
proc.vpid = v;
|
proc.vpid = v;
|
||||||
/* is this proc local_rank=0 on its node? */
|
/* is this proc local_rank=0 on its node? */
|
||||||
if (0 == my_local_rank && 0 == orte_ess.get_local_rank(&proc)) {
|
if (0 == my_local_rank && 0 == orte_ess.get_local_rank(&proc)) {
|
||||||
|
@ -47,7 +47,7 @@ int orte_iof_base_close(void)
|
|||||||
OBJ_DESTRUCT(&orte_iof_base.iof_components_opened);
|
OBJ_DESTRUCT(&orte_iof_base.iof_components_opened);
|
||||||
|
|
||||||
OPAL_THREAD_LOCK(&orte_iof_base.iof_write_output_lock);
|
OPAL_THREAD_LOCK(&orte_iof_base.iof_write_output_lock);
|
||||||
if (!orte_process_info.daemon) {
|
if (!orte_proc_info.daemon) {
|
||||||
/* check if anything is still trying to be written out */
|
/* check if anything is still trying to be written out */
|
||||||
wev = orte_iof_base.iof_write_stdout->wev;
|
wev = orte_iof_base.iof_write_stdout->wev;
|
||||||
if (!opal_list_is_empty(&wev->outputs)) {
|
if (!opal_list_is_empty(&wev->outputs)) {
|
||||||
|
@ -192,7 +192,7 @@ int orte_iof_base_open(void)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* daemons do not need to do this as they do not write out stdout/err */
|
/* daemons do not need to do this as they do not write out stdout/err */
|
||||||
if (!orte_process_info.daemon) {
|
if (!orte_proc_info.daemon) {
|
||||||
/* setup the stdout event */
|
/* setup the stdout event */
|
||||||
ORTE_IOF_SINK_DEFINE(&orte_iof_base.iof_write_stdout, ORTE_PROC_MY_NAME,
|
ORTE_IOF_SINK_DEFINE(&orte_iof_base.iof_write_stdout, ORTE_PROC_MY_NAME,
|
||||||
1, ORTE_IOF_STDOUT, orte_iof_base_write_handler, NULL);
|
1, ORTE_IOF_STDOUT, orte_iof_base_write_handler, NULL);
|
||||||
|
@ -131,7 +131,7 @@ static int orte_iof_hnp_query(mca_base_module_t **module, int *priority)
|
|||||||
*priority = -1;
|
*priority = -1;
|
||||||
|
|
||||||
/* if we are not the HNP, then don't use this module */
|
/* if we are not the HNP, then don't use this module */
|
||||||
if (!orte_process_info.hnp) {
|
if (!orte_proc_info.hnp) {
|
||||||
return ORTE_ERROR;
|
return ORTE_ERROR;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -115,7 +115,7 @@ static int orte_iof_orted_query(mca_base_module_t **module, int *priority)
|
|||||||
*priority = -1;
|
*priority = -1;
|
||||||
|
|
||||||
/* if we are not a daemon, then don't use this module */
|
/* if we are not a daemon, then don't use this module */
|
||||||
if (!orte_process_info.daemon) {
|
if (!orte_proc_info.daemon) {
|
||||||
return ORTE_ERROR;
|
return ORTE_ERROR;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -106,7 +106,7 @@ static int orte_iof_tool_query(mca_base_module_t **module, int *priority)
|
|||||||
*priority = -1;
|
*priority = -1;
|
||||||
|
|
||||||
/* if we are not a tool, then don't use this module */
|
/* if we are not a tool, then don't use this module */
|
||||||
if (!orte_process_info.tool) {
|
if (!orte_proc_info.tool) {
|
||||||
return ORTE_ERROR;
|
return ORTE_ERROR;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -119,7 +119,7 @@ static void mypeerlog(int severity, int errcode, orte_process_name_t *peer_proc,
|
|||||||
peer_name ? peer_name : "UNKNOWN",
|
peer_name ? peer_name : "UNKNOWN",
|
||||||
peer_host ? peer_host : "UNKNOWN",
|
peer_host ? peer_host : "UNKNOWN",
|
||||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||||
orte_process_info.nodename);
|
orte_proc_info.nodename);
|
||||||
space -= len;
|
space -= len;
|
||||||
pos += len;
|
pos += len;
|
||||||
|
|
||||||
|
@ -662,7 +662,7 @@ REPORT_ERROR:
|
|||||||
/* if we are the HNP, then we would rather not send this to ourselves -
|
/* if we are the HNP, then we would rather not send this to ourselves -
|
||||||
* instead, we queue it up for local processing
|
* instead, we queue it up for local processing
|
||||||
*/
|
*/
|
||||||
if (orte_process_info.hnp) {
|
if (orte_proc_info.hnp) {
|
||||||
ORTE_MESSAGE_EVENT(ORTE_PROC_MY_NAME, &alert,
|
ORTE_MESSAGE_EVENT(ORTE_PROC_MY_NAME, &alert,
|
||||||
ORTE_RML_TAG_APP_LAUNCH_CALLBACK,
|
ORTE_RML_TAG_APP_LAUNCH_CALLBACK,
|
||||||
orte_plm_base_app_report_launch);
|
orte_plm_base_app_report_launch);
|
||||||
@ -759,14 +759,14 @@ static int odls_base_default_setup_fork(orte_app_context_t *context,
|
|||||||
|
|
||||||
/* pass my contact info to the local proc so we can talk */
|
/* pass my contact info to the local proc so we can talk */
|
||||||
param = mca_base_param_environ_variable("orte","local_daemon","uri");
|
param = mca_base_param_environ_variable("orte","local_daemon","uri");
|
||||||
opal_setenv(param, orte_process_info.my_daemon_uri, true, environ_copy);
|
opal_setenv(param, orte_proc_info.my_daemon_uri, true, environ_copy);
|
||||||
free(param);
|
free(param);
|
||||||
|
|
||||||
/* pass the hnp's contact info to the local proc in case it
|
/* pass the hnp's contact info to the local proc in case it
|
||||||
* needs it
|
* needs it
|
||||||
*/
|
*/
|
||||||
param = mca_base_param_environ_variable("orte","hnp","uri");
|
param = mca_base_param_environ_variable("orte","hnp","uri");
|
||||||
opal_setenv(param, orte_process_info.my_hnp_uri, true, environ_copy);
|
opal_setenv(param, orte_proc_info.my_hnp_uri, true, environ_copy);
|
||||||
free(param);
|
free(param);
|
||||||
|
|
||||||
/* setup yield schedule - do not override any user-supplied directive! */
|
/* setup yield schedule - do not override any user-supplied directive! */
|
||||||
@ -1419,7 +1419,7 @@ CLEANUP:
|
|||||||
/* if we are the HNP, then we would rather not send this to ourselves -
|
/* if we are the HNP, then we would rather not send this to ourselves -
|
||||||
* instead, we queue it up for local processing
|
* instead, we queue it up for local processing
|
||||||
*/
|
*/
|
||||||
if (orte_process_info.hnp) {
|
if (orte_proc_info.hnp) {
|
||||||
ORTE_MESSAGE_EVENT(ORTE_PROC_MY_NAME, &alert,
|
ORTE_MESSAGE_EVENT(ORTE_PROC_MY_NAME, &alert,
|
||||||
ORTE_RML_TAG_APP_LAUNCH_CALLBACK,
|
ORTE_RML_TAG_APP_LAUNCH_CALLBACK,
|
||||||
orte_plm_base_app_report_launch);
|
orte_plm_base_app_report_launch);
|
||||||
@ -1817,7 +1817,7 @@ int orte_odls_base_default_require_sync(orte_process_name_t *proc,
|
|||||||
/* if we are the HNP, then we would rather not send this to ourselves -
|
/* if we are the HNP, then we would rather not send this to ourselves -
|
||||||
* instead, we queue it up for local processing
|
* instead, we queue it up for local processing
|
||||||
*/
|
*/
|
||||||
if (orte_process_info.hnp) {
|
if (orte_proc_info.hnp) {
|
||||||
ORTE_MESSAGE_EVENT(ORTE_PROC_MY_NAME, &buffer,
|
ORTE_MESSAGE_EVENT(ORTE_PROC_MY_NAME, &buffer,
|
||||||
ORTE_RML_TAG_INIT_ROUTES,
|
ORTE_RML_TAG_INIT_ROUTES,
|
||||||
orte_routed_base_process_msg);
|
orte_routed_base_process_msg);
|
||||||
@ -1923,7 +1923,7 @@ static void check_proc_complete(orte_odls_child_t *child)
|
|||||||
/* if we are the HNP, then we would rather not send this to ourselves -
|
/* if we are the HNP, then we would rather not send this to ourselves -
|
||||||
* instead, we queue it up for local processing
|
* instead, we queue it up for local processing
|
||||||
*/
|
*/
|
||||||
if (orte_process_info.hnp) {
|
if (orte_proc_info.hnp) {
|
||||||
ORTE_MESSAGE_EVENT(ORTE_PROC_MY_NAME, &alert,
|
ORTE_MESSAGE_EVENT(ORTE_PROC_MY_NAME, &alert,
|
||||||
ORTE_RML_TAG_PLM,
|
ORTE_RML_TAG_PLM,
|
||||||
orte_plm_base_receive_process_msg);
|
orte_plm_base_receive_process_msg);
|
||||||
@ -1992,7 +1992,7 @@ static void check_proc_complete(orte_odls_child_t *child)
|
|||||||
/* if we are the HNP, then we would rather not send this to ourselves -
|
/* if we are the HNP, then we would rather not send this to ourselves -
|
||||||
* instead, we queue it up for local processing
|
* instead, we queue it up for local processing
|
||||||
*/
|
*/
|
||||||
if (orte_process_info.hnp) {
|
if (orte_proc_info.hnp) {
|
||||||
ORTE_MESSAGE_EVENT(ORTE_PROC_MY_NAME, &alert,
|
ORTE_MESSAGE_EVENT(ORTE_PROC_MY_NAME, &alert,
|
||||||
ORTE_RML_TAG_PLM,
|
ORTE_RML_TAG_PLM,
|
||||||
orte_plm_base_receive_process_msg);
|
orte_plm_base_receive_process_msg);
|
||||||
@ -2142,8 +2142,8 @@ GOTCHILD:
|
|||||||
free(job);
|
free(job);
|
||||||
goto MOVEON;
|
goto MOVEON;
|
||||||
}
|
}
|
||||||
abort_file = opal_os_path(false, orte_process_info.tmpdir_base,
|
abort_file = opal_os_path(false, orte_proc_info.tmpdir_base,
|
||||||
orte_process_info.top_session_dir,
|
orte_proc_info.top_session_dir,
|
||||||
job, vpid, "abort", NULL );
|
job, vpid, "abort", NULL );
|
||||||
OPAL_OUTPUT_VERBOSE((5, orte_odls_globals.output,
|
OPAL_OUTPUT_VERBOSE((5, orte_odls_globals.output,
|
||||||
"%s odls:waitpid_fired checking abort file %s",
|
"%s odls:waitpid_fired checking abort file %s",
|
||||||
@ -2431,7 +2431,7 @@ int orte_odls_base_default_kill_local_procs(orte_jobid_t job, bool set_state,
|
|||||||
if (0 != (err = kill_local(child->pid, SIGTERM))) {
|
if (0 != (err = kill_local(child->pid, SIGTERM))) {
|
||||||
orte_show_help("help-odls-default.txt",
|
orte_show_help("help-odls-default.txt",
|
||||||
"odls-default:could-not-send-kill",
|
"odls-default:could-not-send-kill",
|
||||||
true, orte_process_info.nodename, child->pid, err);
|
true, orte_proc_info.nodename, child->pid, err);
|
||||||
/* check the proc state - ensure it is in one of the termination
|
/* check the proc state - ensure it is in one of the termination
|
||||||
* states so that we properly wakeup
|
* states so that we properly wakeup
|
||||||
*/
|
*/
|
||||||
@ -2457,7 +2457,7 @@ int orte_odls_base_default_kill_local_procs(orte_jobid_t job, bool set_state,
|
|||||||
if (!child_died(child->pid, orte_odls_globals.timeout_before_sigkill, &exit_status)) {
|
if (!child_died(child->pid, orte_odls_globals.timeout_before_sigkill, &exit_status)) {
|
||||||
orte_show_help("help-odls-default.txt",
|
orte_show_help("help-odls-default.txt",
|
||||||
"odls-default:could-not-kill",
|
"odls-default:could-not-kill",
|
||||||
true, orte_process_info.nodename, child->pid);
|
true, orte_proc_info.nodename, child->pid);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
OPAL_OUTPUT_VERBOSE((5, orte_odls_globals.output,
|
OPAL_OUTPUT_VERBOSE((5, orte_odls_globals.output,
|
||||||
@ -2486,7 +2486,7 @@ RECORD:
|
|||||||
/* if we are the HNP, then we would rather not send this to ourselves -
|
/* if we are the HNP, then we would rather not send this to ourselves -
|
||||||
* instead, we queue it up for local processing
|
* instead, we queue it up for local processing
|
||||||
*/
|
*/
|
||||||
if (orte_process_info.hnp) {
|
if (orte_proc_info.hnp) {
|
||||||
ORTE_MESSAGE_EVENT(ORTE_PROC_MY_NAME, &alert,
|
ORTE_MESSAGE_EVENT(ORTE_PROC_MY_NAME, &alert,
|
||||||
ORTE_RML_TAG_PLM,
|
ORTE_RML_TAG_PLM,
|
||||||
orte_plm_base_receive_process_msg);
|
orte_plm_base_receive_process_msg);
|
||||||
@ -2538,10 +2538,10 @@ int orte_odls_base_get_proc_stats(opal_buffer_t *answer,
|
|||||||
|
|
||||||
OBJ_CONSTRUCT(&stats, opal_pstats_t);
|
OBJ_CONSTRUCT(&stats, opal_pstats_t);
|
||||||
/* record node up to first '.' */
|
/* record node up to first '.' */
|
||||||
for (j=0; j < (int)strlen(orte_process_info.nodename) &&
|
for (j=0; j < (int)strlen(orte_proc_info.nodename) &&
|
||||||
j < OPAL_PSTAT_MAX_STRING_LEN-1 &&
|
j < OPAL_PSTAT_MAX_STRING_LEN-1 &&
|
||||||
orte_process_info.nodename[j] != '.'; j++) {
|
orte_proc_info.nodename[j] != '.'; j++) {
|
||||||
stats.node[j] = orte_process_info.nodename[j];
|
stats.node[j] = orte_proc_info.nodename[j];
|
||||||
}
|
}
|
||||||
/* record rank */
|
/* record rank */
|
||||||
stats.rank = child->name->vpid;
|
stats.rank = child->name->vpid;
|
||||||
|
@ -70,7 +70,7 @@ int orte_odls_base_preload_files_app_context(orte_app_context_t* app_context)
|
|||||||
|
|
||||||
/* Define the process set */
|
/* Define the process set */
|
||||||
p_set = OBJ_NEW(orte_filem_base_process_set_t);
|
p_set = OBJ_NEW(orte_filem_base_process_set_t);
|
||||||
if( orte_process_info.hnp ) {
|
if( orte_proc_info.hnp ) {
|
||||||
/* if I am the HNP, then use me as the source */
|
/* if I am the HNP, then use me as the source */
|
||||||
p_set->source.jobid = ORTE_PROC_MY_NAME->jobid;
|
p_set->source.jobid = ORTE_PROC_MY_NAME->jobid;
|
||||||
p_set->source.vpid = ORTE_PROC_MY_NAME->vpid;
|
p_set->source.vpid = ORTE_PROC_MY_NAME->vpid;
|
||||||
@ -152,7 +152,7 @@ static int orte_odls_base_preload_append_binary(orte_app_context_t* context,
|
|||||||
f_set = OBJ_NEW(orte_filem_base_file_set_t);
|
f_set = OBJ_NEW(orte_filem_base_file_set_t);
|
||||||
|
|
||||||
/* Local Placement */
|
/* Local Placement */
|
||||||
asprintf(&local_bin, "%s/%s", orte_process_info.job_session_dir, opal_basename(context->app));
|
asprintf(&local_bin, "%s/%s", orte_proc_info.job_session_dir, opal_basename(context->app));
|
||||||
if(orte_odls_base_is_preload_local_dup(local_bin, filem_request) ) {
|
if(orte_odls_base_is_preload_local_dup(local_bin, filem_request) ) {
|
||||||
goto cleanup;
|
goto cleanup;
|
||||||
}
|
}
|
||||||
@ -222,7 +222,7 @@ static int orte_odls_base_preload_append_files(orte_app_context_t* context,
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* If this is the HNP, then source = sink, so use the same path for each local and remote */
|
/* If this is the HNP, then source = sink, so use the same path for each local and remote */
|
||||||
if( orte_process_info.hnp ) {
|
if( orte_proc_info.hnp ) {
|
||||||
free(remote_targets[i]);
|
free(remote_targets[i]);
|
||||||
remote_targets[i] = strdup(local_ref);
|
remote_targets[i] = strdup(local_ref);
|
||||||
}
|
}
|
||||||
|
@ -136,7 +136,7 @@ static char *
|
|||||||
false, false, NULL, &user);
|
false, false, NULL, &user);
|
||||||
|
|
||||||
if (0 > asprintf(&frontend, OPAL_PATH_SEP"%s"OPAL_PATH_SEP"openmpi-bproc-%s",
|
if (0 > asprintf(&frontend, OPAL_PATH_SEP"%s"OPAL_PATH_SEP"openmpi-bproc-%s",
|
||||||
orte_process_info.tmpdir_base, user)) {
|
orte_proc_info.tmpdir_base, user)) {
|
||||||
ORTE_ERROR_LOG(ORTE_ERROR);
|
ORTE_ERROR_LOG(ORTE_ERROR);
|
||||||
path = NULL;
|
path = NULL;
|
||||||
}
|
}
|
||||||
@ -524,7 +524,7 @@ int orte_odls_bproc_finalize(void)
|
|||||||
{
|
{
|
||||||
orte_iof.iof_flush();
|
orte_iof.iof_flush();
|
||||||
odls_bproc_remove_dir();
|
odls_bproc_remove_dir();
|
||||||
orte_session_dir_finalize(orte_process_info.my_name);
|
orte_session_dir_finalize(orte_proc_info.my_name);
|
||||||
return ORTE_SUCCESS;
|
return ORTE_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -546,14 +546,14 @@ mca_oob_tcp_create_listen(int *target_sd, unsigned short *target_port, uint16_t
|
|||||||
port in the range. Otherwise, tcp_port_min will be 0, which
|
port in the range. Otherwise, tcp_port_min will be 0, which
|
||||||
means "pick any port" */
|
means "pick any port" */
|
||||||
if (AF_INET == af_family) {
|
if (AF_INET == af_family) {
|
||||||
if (orte_process_info.daemon) {
|
if (orte_proc_info.daemon) {
|
||||||
/* if static ports were provided, the daemon takes the
|
/* if static ports were provided, the daemon takes the
|
||||||
* first entry in the list - otherwise, we "pick any port"
|
* first entry in the list - otherwise, we "pick any port"
|
||||||
*/
|
*/
|
||||||
if (NULL != mca_oob_tcp_component.tcp4_static_ports) {
|
if (NULL != mca_oob_tcp_component.tcp4_static_ports) {
|
||||||
port = strtol(mca_oob_tcp_component.tcp4_static_ports[0], NULL, 10);
|
port = strtol(mca_oob_tcp_component.tcp4_static_ports[0], NULL, 10);
|
||||||
/* save the port for later use */
|
/* save the port for later use */
|
||||||
orte_process_info.my_port = port;
|
orte_proc_info.my_port = port;
|
||||||
/* convert it to network-byte-order */
|
/* convert it to network-byte-order */
|
||||||
port = htons(port);
|
port = htons(port);
|
||||||
/* flag that we are using static ports */
|
/* flag that we are using static ports */
|
||||||
@ -562,7 +562,7 @@ mca_oob_tcp_create_listen(int *target_sd, unsigned short *target_port, uint16_t
|
|||||||
port = 0;
|
port = 0;
|
||||||
orte_static_ports = false;
|
orte_static_ports = false;
|
||||||
}
|
}
|
||||||
} else if (orte_process_info.mpi_proc) {
|
} else if (orte_proc_info.mpi_proc) {
|
||||||
/* if static ports were provided, an mpi proc takes its
|
/* if static ports were provided, an mpi proc takes its
|
||||||
* node_local_rank entry in the list IF it has that info
|
* node_local_rank entry in the list IF it has that info
|
||||||
* AND enough ports were provided - otherwise, we "pick any port"
|
* AND enough ports were provided - otherwise, we "pick any port"
|
||||||
@ -575,7 +575,7 @@ mca_oob_tcp_create_listen(int *target_sd, unsigned short *target_port, uint16_t
|
|||||||
/* any daemon takes the first entry, so we start with the second */
|
/* any daemon takes the first entry, so we start with the second */
|
||||||
port = strtol(mca_oob_tcp_component.tcp4_static_ports[nrank+1], NULL, 10);
|
port = strtol(mca_oob_tcp_component.tcp4_static_ports[nrank+1], NULL, 10);
|
||||||
/* save the port for later use */
|
/* save the port for later use */
|
||||||
orte_process_info.my_port = port;
|
orte_proc_info.my_port = port;
|
||||||
/* convert it to network-byte-order */
|
/* convert it to network-byte-order */
|
||||||
port = htons(port);
|
port = htons(port);
|
||||||
/* flag that we are using static ports */
|
/* flag that we are using static ports */
|
||||||
@ -599,14 +599,14 @@ mca_oob_tcp_create_listen(int *target_sd, unsigned short *target_port, uint16_t
|
|||||||
|
|
||||||
#if OPAL_WANT_IPV6
|
#if OPAL_WANT_IPV6
|
||||||
if (AF_INET6 == af_family) {
|
if (AF_INET6 == af_family) {
|
||||||
if (orte_process_info.daemon) {
|
if (orte_proc_info.daemon) {
|
||||||
/* if static ports were provided, the daemon takes the
|
/* if static ports were provided, the daemon takes the
|
||||||
* first entry in the list - otherwise, we "pick any port"
|
* first entry in the list - otherwise, we "pick any port"
|
||||||
*/
|
*/
|
||||||
if (NULL != mca_oob_tcp_component.tcp6_static_ports) {
|
if (NULL != mca_oob_tcp_component.tcp6_static_ports) {
|
||||||
port = strtol(mca_oob_tcp_component.tcp6_static_ports[0], NULL, 10);
|
port = strtol(mca_oob_tcp_component.tcp6_static_ports[0], NULL, 10);
|
||||||
/* save the port for later use */
|
/* save the port for later use */
|
||||||
orte_process_info.my_port = port;
|
orte_proc_info.my_port = port;
|
||||||
/* convert it to network-byte-order */
|
/* convert it to network-byte-order */
|
||||||
port = htons(port);
|
port = htons(port);
|
||||||
/* flag that we are using static ports */
|
/* flag that we are using static ports */
|
||||||
@ -615,7 +615,7 @@ mca_oob_tcp_create_listen(int *target_sd, unsigned short *target_port, uint16_t
|
|||||||
port = 0;
|
port = 0;
|
||||||
orte_static_ports = false;
|
orte_static_ports = false;
|
||||||
}
|
}
|
||||||
} else if (orte_process_info.mpi_proc) {
|
} else if (orte_proc_info.mpi_proc) {
|
||||||
/* if static ports were provided, an mpi proc takes its
|
/* if static ports were provided, an mpi proc takes its
|
||||||
* node_local_rank entry in the list IF it has that info
|
* node_local_rank entry in the list IF it has that info
|
||||||
* AND enough ports were provided - otherwise, we "pick any port"
|
* AND enough ports were provided - otherwise, we "pick any port"
|
||||||
@ -628,7 +628,7 @@ mca_oob_tcp_create_listen(int *target_sd, unsigned short *target_port, uint16_t
|
|||||||
/* any daemon takes the first entry, so we start with the second */
|
/* any daemon takes the first entry, so we start with the second */
|
||||||
port = strtol(mca_oob_tcp_component.tcp6_static_ports[nrank+1], NULL, 10);
|
port = strtol(mca_oob_tcp_component.tcp6_static_ports[nrank+1], NULL, 10);
|
||||||
/* save the port for later use */
|
/* save the port for later use */
|
||||||
orte_process_info.my_port = port;
|
orte_proc_info.my_port = port;
|
||||||
/* convert it to network-byte-order */
|
/* convert it to network-byte-order */
|
||||||
port = htons(port);
|
port = htons(port);
|
||||||
/* flag that we are using static ports */
|
/* flag that we are using static ports */
|
||||||
@ -701,7 +701,7 @@ mca_oob_tcp_create_listen(int *target_sd, unsigned short *target_port, uint16_t
|
|||||||
/* if we dynamically assigned the port, save it here,
|
/* if we dynamically assigned the port, save it here,
|
||||||
* remembering to convert it back from network byte order first
|
* remembering to convert it back from network byte order first
|
||||||
*/
|
*/
|
||||||
orte_process_info.my_port = ntohs(*target_port);
|
orte_proc_info.my_port = ntohs(*target_port);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* setup listen backlog to maximum allowed by kernel */
|
/* setup listen backlog to maximum allowed by kernel */
|
||||||
@ -1368,7 +1368,7 @@ int mca_oob_tcp_init(void)
|
|||||||
{
|
{
|
||||||
orte_jobid_t jobid;
|
orte_jobid_t jobid;
|
||||||
int rc;
|
int rc;
|
||||||
int randval = orte_process_info.num_procs;
|
int randval = orte_proc_info.num_procs;
|
||||||
|
|
||||||
if (0 == randval) randval = 10;
|
if (0 == randval) randval = 10;
|
||||||
|
|
||||||
@ -1387,10 +1387,10 @@ int mca_oob_tcp_init(void)
|
|||||||
jobid = ORTE_PROC_MY_NAME->jobid;
|
jobid = ORTE_PROC_MY_NAME->jobid;
|
||||||
|
|
||||||
/* Fix up the listen type. This is the first call into the OOB in
|
/* Fix up the listen type. This is the first call into the OOB in
|
||||||
which the orte_process_info.hnp field is reliably set. The
|
which the orte_proc_info.hnp field is reliably set. The
|
||||||
listen_mode should only be listen_thread for the HNP -- all
|
listen_mode should only be listen_thread for the HNP -- all
|
||||||
others should use the traditional event library. */
|
others should use the traditional event library. */
|
||||||
if (!orte_process_info.hnp) {
|
if (!orte_proc_info.hnp) {
|
||||||
mca_oob_tcp_component.tcp_listen_type = OOB_TCP_EVENT;
|
mca_oob_tcp_component.tcp_listen_type = OOB_TCP_EVENT;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -478,7 +478,7 @@ static void mca_oob_tcp_msg_data(mca_oob_tcp_msg_t* msg, mca_oob_tcp_peer_t* pee
|
|||||||
* another job family - procs dont' need to do this because
|
* another job family - procs dont' need to do this because
|
||||||
* they always route through their daemons anyway
|
* they always route through their daemons anyway
|
||||||
*/
|
*/
|
||||||
if (!orte_process_info.mpi_proc) {
|
if (!orte_proc_info.mpi_proc) {
|
||||||
if ((ORTE_JOB_FAMILY(msg->msg_hdr.msg_origin.jobid) !=
|
if ((ORTE_JOB_FAMILY(msg->msg_hdr.msg_origin.jobid) !=
|
||||||
ORTE_JOB_FAMILY(ORTE_PROC_MY_NAME->jobid)) &&
|
ORTE_JOB_FAMILY(ORTE_PROC_MY_NAME->jobid)) &&
|
||||||
(0 != ORTE_JOB_FAMILY(msg->msg_hdr.msg_origin.jobid))) {
|
(0 != ORTE_JOB_FAMILY(msg->msg_hdr.msg_origin.jobid))) {
|
||||||
|
@ -39,7 +39,7 @@ int orte_plm_base_finalize(void)
|
|||||||
orte_plm.finalize();
|
orte_plm.finalize();
|
||||||
|
|
||||||
/* if we are the HNP, then stop our receive */
|
/* if we are the HNP, then stop our receive */
|
||||||
if (orte_process_info.hnp) {
|
if (orte_proc_info.hnp) {
|
||||||
if (ORTE_SUCCESS != (rc = orte_plm_base_comm_stop())) {
|
if (ORTE_SUCCESS != (rc = orte_plm_base_comm_stop())) {
|
||||||
ORTE_ERROR_LOG(rc);
|
ORTE_ERROR_LOG(rc);
|
||||||
return rc;
|
return rc;
|
||||||
|
@ -41,9 +41,9 @@ int orte_plm_base_set_hnp_name(void)
|
|||||||
uint32_t bias;
|
uint32_t bias;
|
||||||
|
|
||||||
/* hash the nodename */
|
/* hash the nodename */
|
||||||
OPAL_HASH_STR(orte_process_info.nodename, hash32);
|
OPAL_HASH_STR(orte_proc_info.nodename, hash32);
|
||||||
|
|
||||||
bias = (uint32_t)orte_process_info.pid;
|
bias = (uint32_t)orte_proc_info.pid;
|
||||||
|
|
||||||
OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output,
|
OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output,
|
||||||
"plm:base:set_hnp_name: initial bias %ld nodename hash %lu",
|
"plm:base:set_hnp_name: initial bias %ld nodename hash %lu",
|
||||||
|
@ -151,7 +151,7 @@ int orte_plm_base_setup_job(orte_job_t *jdata)
|
|||||||
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
|
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
|
||||||
return ORTE_ERR_NOT_FOUND;
|
return ORTE_ERR_NOT_FOUND;
|
||||||
}
|
}
|
||||||
if (orte_process_info.num_procs != jdatorted->num_procs) {
|
if (orte_proc_info.num_procs != jdatorted->num_procs) {
|
||||||
/* more daemons are being launched - update the routing tree to
|
/* more daemons are being launched - update the routing tree to
|
||||||
* ensure that the HNP knows how to route messages via
|
* ensure that the HNP knows how to route messages via
|
||||||
* the daemon routing tree - this needs to be done
|
* the daemon routing tree - this needs to be done
|
||||||
@ -159,7 +159,7 @@ int orte_plm_base_setup_job(orte_job_t *jdata)
|
|||||||
* hasn't unpacked its launch message prior to being
|
* hasn't unpacked its launch message prior to being
|
||||||
* asked to communicate.
|
* asked to communicate.
|
||||||
*/
|
*/
|
||||||
orte_process_info.num_procs = jdatorted->num_procs;
|
orte_proc_info.num_procs = jdatorted->num_procs;
|
||||||
if (ORTE_SUCCESS != (rc = orte_routed.update_routing_tree())) {
|
if (ORTE_SUCCESS != (rc = orte_routed.update_routing_tree())) {
|
||||||
ORTE_ERROR_LOG(rc);
|
ORTE_ERROR_LOG(rc);
|
||||||
return rc;
|
return rc;
|
||||||
@ -1012,11 +1012,11 @@ int orte_plm_base_orted_append_basic_args(int *argc, char ***argv,
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* pass the total number of daemons that will be in the system */
|
/* pass the total number of daemons that will be in the system */
|
||||||
if (orte_process_info.hnp) {
|
if (orte_proc_info.hnp) {
|
||||||
jdata = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid);
|
jdata = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid);
|
||||||
num_procs = jdata->num_procs;
|
num_procs = jdata->num_procs;
|
||||||
} else {
|
} else {
|
||||||
num_procs = orte_process_info.num_procs;
|
num_procs = orte_proc_info.num_procs;
|
||||||
}
|
}
|
||||||
opal_argv_append(argc, argv, "-mca");
|
opal_argv_append(argc, argv, "-mca");
|
||||||
opal_argv_append(argc, argv, "orte_ess_num_procs");
|
opal_argv_append(argc, argv, "orte_ess_num_procs");
|
||||||
@ -1025,10 +1025,10 @@ int orte_plm_base_orted_append_basic_args(int *argc, char ***argv,
|
|||||||
free(param);
|
free(param);
|
||||||
|
|
||||||
/* pass the uri of the hnp */
|
/* pass the uri of the hnp */
|
||||||
if (orte_process_info.hnp) {
|
if (orte_proc_info.hnp) {
|
||||||
rml_uri = orte_rml.get_contact_info();
|
rml_uri = orte_rml.get_contact_info();
|
||||||
} else {
|
} else {
|
||||||
rml_uri = orte_process_info.my_hnp_uri;
|
rml_uri = orte_proc_info.my_hnp_uri;
|
||||||
}
|
}
|
||||||
asprintf(¶m, "\"%s\"", rml_uri);
|
asprintf(¶m, "\"%s\"", rml_uri);
|
||||||
opal_argv_append(argc, argv, "--hnp-uri");
|
opal_argv_append(argc, argv, "--hnp-uri");
|
||||||
@ -1039,7 +1039,7 @@ int orte_plm_base_orted_append_basic_args(int *argc, char ***argv,
|
|||||||
* being sure to "purge" any that would cause problems
|
* being sure to "purge" any that would cause problems
|
||||||
* on backend nodes
|
* on backend nodes
|
||||||
*/
|
*/
|
||||||
if (orte_process_info.hnp) {
|
if (orte_proc_info.hnp) {
|
||||||
cnt = opal_argv_count(orted_cmd_line);
|
cnt = opal_argv_count(orted_cmd_line);
|
||||||
for (i=0; i < cnt; i+=3) {
|
for (i=0; i < cnt; i+=3) {
|
||||||
/* if the specified option is more than one word, we don't
|
/* if the specified option is more than one word, we don't
|
||||||
|
@ -278,7 +278,7 @@ int orte_plm_base_orted_kill_local_procs(orte_jobid_t job)
|
|||||||
* fire right away, but that's okay
|
* fire right away, but that's okay
|
||||||
* The macro makes a copy of the buffer, so it's okay to release it here
|
* The macro makes a copy of the buffer, so it's okay to release it here
|
||||||
*/
|
*/
|
||||||
if (orte_process_info.hnp) {
|
if (orte_proc_info.hnp) {
|
||||||
ORTE_MESSAGE_EVENT(ORTE_PROC_MY_NAME, &cmd, ORTE_RML_TAG_DAEMON, orte_daemon_cmd_processor);
|
ORTE_MESSAGE_EVENT(ORTE_PROC_MY_NAME, &cmd, ORTE_RML_TAG_DAEMON, orte_daemon_cmd_processor);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -312,7 +312,7 @@ void orte_plm_base_receive_process_msg(int fd, short event, void *data)
|
|||||||
OBJ_DESTRUCT(&answer);
|
OBJ_DESTRUCT(&answer);
|
||||||
|
|
||||||
/* see if an error occurred - if so, wakeup the HNP so we can exit */
|
/* see if an error occurred - if so, wakeup the HNP so we can exit */
|
||||||
if (orte_process_info.hnp && ORTE_SUCCESS != rc) {
|
if (orte_proc_info.hnp && ORTE_SUCCESS != rc) {
|
||||||
orte_trigger_event(&orte_exit);
|
orte_trigger_event(&orte_exit);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -186,7 +186,7 @@ int orte_plm_base_local_slave_launch(orte_job_t *jdata)
|
|||||||
OBJ_DESTRUCT(&hosts);
|
OBJ_DESTRUCT(&hosts);
|
||||||
|
|
||||||
/* is this a local operation? */
|
/* is this a local operation? */
|
||||||
if (0 == strcmp(orte_process_info.nodename, nodename)) {
|
if (0 == strcmp(orte_proc_info.nodename, nodename)) {
|
||||||
local_op = true;
|
local_op = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -456,7 +456,7 @@ int orte_plm_base_local_slave_launch(orte_job_t *jdata)
|
|||||||
* required to pass existence tests
|
* required to pass existence tests
|
||||||
*/
|
*/
|
||||||
param = mca_base_param_environ_variable("orte","hnp","uri");
|
param = mca_base_param_environ_variable("orte","hnp","uri");
|
||||||
asprintf(&path, "\"%s\"", orte_process_info.my_hnp_uri);
|
asprintf(&path, "\"%s\"", orte_proc_info.my_hnp_uri);
|
||||||
opal_setenv(param, path, true, &argv);
|
opal_setenv(param, path, true, &argv);
|
||||||
free(param);
|
free(param);
|
||||||
free(path);
|
free(path);
|
||||||
|
@ -53,7 +53,7 @@ int orte_plm_base_select(void)
|
|||||||
* If we didn't find one, and we are a daemon, then default to retaining the proxy.
|
* If we didn't find one, and we are a daemon, then default to retaining the proxy.
|
||||||
* Otherwise, if we didn't find one to select, that is unacceptable.
|
* Otherwise, if we didn't find one to select, that is unacceptable.
|
||||||
*/
|
*/
|
||||||
if (orte_process_info.daemon) {
|
if (orte_proc_info.daemon) {
|
||||||
/* don't record a selected component or flag selected
|
/* don't record a selected component or flag selected
|
||||||
* so we finalize correctly - just leave the plm alone
|
* so we finalize correctly - just leave the plm alone
|
||||||
* as it defaults to pointing at the proxy
|
* as it defaults to pointing at the proxy
|
||||||
|
@ -356,12 +356,12 @@ static void orte_plm_bproc_setup_env(char *** env)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* ns replica contact info */
|
/* ns replica contact info */
|
||||||
if(NULL == orte_process_info.ns_replica) {
|
if(NULL == orte_proc_info.ns_replica) {
|
||||||
orte_dss.copy((void**)&orte_process_info.ns_replica, orte_process_info.my_name, ORTE_NAME);
|
orte_dss.copy((void**)&orte_proc_info.ns_replica, orte_proc_info.my_name, ORTE_NAME);
|
||||||
orte_process_info.ns_replica_uri = orte_rml.get_uri();
|
orte_proc_info.ns_replica_uri = orte_rml.get_uri();
|
||||||
}
|
}
|
||||||
var = mca_base_param_environ_variable("ns","replica","uri");
|
var = mca_base_param_environ_variable("ns","replica","uri");
|
||||||
opal_setenv(var,orte_process_info.ns_replica_uri, true, env);
|
opal_setenv(var,orte_proc_info.ns_replica_uri, true, env);
|
||||||
free(var);
|
free(var);
|
||||||
|
|
||||||
/* make sure the username used to create the bproc directory is the same on
|
/* make sure the username used to create the bproc directory is the same on
|
||||||
@ -371,12 +371,12 @@ static void orte_plm_bproc_setup_env(char *** env)
|
|||||||
free(var);
|
free(var);
|
||||||
|
|
||||||
/* gpr replica contact info */
|
/* gpr replica contact info */
|
||||||
if(NULL == orte_process_info.gpr_replica) {
|
if(NULL == orte_proc_info.gpr_replica) {
|
||||||
orte_dss.copy((void**)&orte_process_info.gpr_replica, orte_process_info.my_name, ORTE_NAME);
|
orte_dss.copy((void**)&orte_proc_info.gpr_replica, orte_proc_info.my_name, ORTE_NAME);
|
||||||
orte_process_info.gpr_replica_uri = orte_rml.get_uri();
|
orte_proc_info.gpr_replica_uri = orte_rml.get_uri();
|
||||||
}
|
}
|
||||||
var = mca_base_param_environ_variable("gpr","replica","uri");
|
var = mca_base_param_environ_variable("gpr","replica","uri");
|
||||||
opal_setenv(var,orte_process_info.gpr_replica_uri, true, env);
|
opal_setenv(var,orte_proc_info.gpr_replica_uri, true, env);
|
||||||
free(var);
|
free(var);
|
||||||
|
|
||||||
/* universe directory - needs to match orted */
|
/* universe directory - needs to match orted */
|
||||||
|
@ -106,7 +106,7 @@ static int orte_smr_bproc_open(void)
|
|||||||
|
|
||||||
static orte_smr_base_module_t* orte_smr_bproc_init(int *priority)
|
static orte_smr_base_module_t* orte_smr_bproc_init(int *priority)
|
||||||
{
|
{
|
||||||
if (!orte_process_info.seed) {
|
if (!orte_proc_info.seed) {
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -144,7 +144,7 @@ static int orte_plm_ccp_component_query(mca_base_module_t **module, int *priorit
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* if we are NOT an HNP, then don't select us */
|
/* if we are NOT an HNP, then don't select us */
|
||||||
if (!orte_process_info.hnp) {
|
if (!orte_proc_info.hnp) {
|
||||||
pCluster->Release();
|
pCluster->Release();
|
||||||
*module = NULL;
|
*module = NULL;
|
||||||
return ORTE_ERROR;
|
return ORTE_ERROR;
|
||||||
|
@ -302,7 +302,7 @@ static void orte_plm_rsh_wait_daemon(pid_t pid, int status, void* cbdata)
|
|||||||
/* if we are not the HNP, send a message to the HNP alerting it
|
/* if we are not the HNP, send a message to the HNP alerting it
|
||||||
* to the failure
|
* to the failure
|
||||||
*/
|
*/
|
||||||
if (!orte_process_info.hnp) {
|
if (!orte_proc_info.hnp) {
|
||||||
opal_buffer_t buf;
|
opal_buffer_t buf;
|
||||||
orte_vpid_t *vpid=(orte_vpid_t*)cbdata;
|
orte_vpid_t *vpid=(orte_vpid_t*)cbdata;
|
||||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||||
@ -672,7 +672,7 @@ static int setup_launch(int *argcptr, char ***argvptr,
|
|||||||
* by enclosing them in quotes. Check for any multi-word
|
* by enclosing them in quotes. Check for any multi-word
|
||||||
* mca params passed to mpirun and include them
|
* mca params passed to mpirun and include them
|
||||||
*/
|
*/
|
||||||
if (orte_process_info.hnp) {
|
if (orte_proc_info.hnp) {
|
||||||
int cnt, i;
|
int cnt, i;
|
||||||
cnt = opal_argv_count(orted_cmd_line);
|
cnt = opal_argv_count(orted_cmd_line);
|
||||||
for (i=0; i < cnt; i+=3) {
|
for (i=0; i < cnt; i+=3) {
|
||||||
@ -852,7 +852,7 @@ static int remote_spawn(opal_buffer_t *launch)
|
|||||||
OBJ_RELEASE(item);
|
OBJ_RELEASE(item);
|
||||||
}
|
}
|
||||||
/* reconstruct the child list */
|
/* reconstruct the child list */
|
||||||
find_children(0, 0, ORTE_PROC_MY_NAME->vpid, orte_process_info.num_procs);
|
find_children(0, 0, ORTE_PROC_MY_NAME->vpid, orte_proc_info.num_procs);
|
||||||
|
|
||||||
/* if I have no children, just return */
|
/* if I have no children, just return */
|
||||||
if (opal_list_is_empty(&mca_plm_rsh_component.children)) {
|
if (opal_list_is_empty(&mca_plm_rsh_component.children)) {
|
||||||
@ -865,7 +865,7 @@ static int remote_spawn(opal_buffer_t *launch)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* setup the launch */
|
/* setup the launch */
|
||||||
if (ORTE_SUCCESS != (rc = setup_launch(&argc, &argv, orte_process_info.nodename, &node_name_index1,
|
if (ORTE_SUCCESS != (rc = setup_launch(&argc, &argv, orte_proc_info.nodename, &node_name_index1,
|
||||||
&proc_vpid_index, prefix))) {
|
&proc_vpid_index, prefix))) {
|
||||||
ORTE_ERROR_LOG(rc);
|
ORTE_ERROR_LOG(rc);
|
||||||
goto cleanup;
|
goto cleanup;
|
||||||
|
@ -647,7 +647,7 @@ int orte_plm_submit_launch(orte_job_t *jdata)
|
|||||||
* match, check using ifislocal().
|
* match, check using ifislocal().
|
||||||
*/
|
*/
|
||||||
if (!mca_plm_submit_component.force_submit &&
|
if (!mca_plm_submit_component.force_submit &&
|
||||||
(0 == strcmp(nodes[nnode]->name, orte_process_info.nodename) ||
|
(0 == strcmp(nodes[nnode]->name, orte_proc_info.nodename) ||
|
||||||
opal_ifislocal(nodes[nnode]->name))) {
|
opal_ifislocal(nodes[nnode]->name))) {
|
||||||
if (mca_plm_submit_component.debug) {
|
if (mca_plm_submit_component.debug) {
|
||||||
opal_output(0, "plm:submit: %s is a LOCAL node\n",
|
opal_output(0, "plm:submit: %s is a LOCAL node\n",
|
||||||
|
@ -86,7 +86,7 @@ static int ras_alps_open(void)
|
|||||||
static int orte_ras_alps_component_query(mca_base_module_t **module, int *priority)
|
static int orte_ras_alps_component_query(mca_base_module_t **module, int *priority)
|
||||||
{
|
{
|
||||||
/* if we are not an HNP, then we must not be selected */
|
/* if we are not an HNP, then we must not be selected */
|
||||||
if (!orte_process_info.hnp) {
|
if (!orte_proc_info.hnp) {
|
||||||
*module = NULL;
|
*module = NULL;
|
||||||
return ORTE_ERROR;
|
return ORTE_ERROR;
|
||||||
}
|
}
|
||||||
|
@ -288,10 +288,10 @@ int orte_ras_base_allocate(orte_job_t *jdata)
|
|||||||
OBJ_DESTRUCT(&nodes);
|
OBJ_DESTRUCT(&nodes);
|
||||||
return ORTE_ERR_OUT_OF_RESOURCE;
|
return ORTE_ERR_OUT_OF_RESOURCE;
|
||||||
}
|
}
|
||||||
/* use the same name we got in orte_process_info so we avoid confusion in
|
/* use the same name we got in orte_proc_info so we avoid confusion in
|
||||||
* the session directories
|
* the session directories
|
||||||
*/
|
*/
|
||||||
node->name = strdup(orte_process_info.nodename);
|
node->name = strdup(orte_proc_info.nodename);
|
||||||
node->state = ORTE_NODE_STATE_UP;
|
node->state = ORTE_NODE_STATE_UP;
|
||||||
node->slots_inuse = 0;
|
node->slots_inuse = 0;
|
||||||
node->slots_max = 0;
|
node->slots_max = 0;
|
||||||
|
@ -105,7 +105,7 @@ static int orte_ras_ccp_component_query(mca_base_module_t **module, int *priorit
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* if we are NOT an HNP, then don't select us */
|
/* if we are NOT an HNP, then don't select us */
|
||||||
if (!orte_process_info.hnp) {
|
if (!orte_proc_info.hnp) {
|
||||||
pCluster->Release();
|
pCluster->Release();
|
||||||
*module = NULL;
|
*module = NULL;
|
||||||
return ORTE_ERROR;
|
return ORTE_ERROR;
|
||||||
|
@ -131,9 +131,9 @@ int orte_rml_base_update_contact_info(opal_buffer_t* data)
|
|||||||
* in our process_info struct so we can correctly route any messages
|
* in our process_info struct so we can correctly route any messages
|
||||||
*/
|
*/
|
||||||
if (ORTE_PROC_MY_NAME->jobid == name.jobid &&
|
if (ORTE_PROC_MY_NAME->jobid == name.jobid &&
|
||||||
orte_process_info.daemon &&
|
orte_proc_info.daemon &&
|
||||||
orte_process_info.num_procs < num_procs) {
|
orte_proc_info.num_procs < num_procs) {
|
||||||
orte_process_info.num_procs = num_procs;
|
orte_proc_info.num_procs = num_procs;
|
||||||
/* if we changed it, then we better update the routed
|
/* if we changed it, then we better update the routed
|
||||||
* tree so daemon collectives work correctly
|
* tree so daemon collectives work correctly
|
||||||
*/
|
*/
|
||||||
|
@ -57,7 +57,7 @@ int orte_routed_base_comm_start(void)
|
|||||||
{
|
{
|
||||||
int rc;
|
int rc;
|
||||||
|
|
||||||
if (recv_issued || !orte_process_info.hnp) {
|
if (recv_issued || !orte_proc_info.hnp) {
|
||||||
return ORTE_SUCCESS;
|
return ORTE_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -83,7 +83,7 @@ int orte_routed_base_comm_stop(void)
|
|||||||
{
|
{
|
||||||
int rc;
|
int rc;
|
||||||
|
|
||||||
if (!recv_issued || !orte_process_info.hnp) {
|
if (!recv_issued || !orte_proc_info.hnp) {
|
||||||
return ORTE_SUCCESS;
|
return ORTE_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -37,7 +37,7 @@ static void report_sync(int status, orte_process_name_t* sender,
|
|||||||
orte_rml_tag_t tag, void *cbdata)
|
orte_rml_tag_t tag, void *cbdata)
|
||||||
{
|
{
|
||||||
/* just copy the payload to the sync_buf */
|
/* just copy the payload to the sync_buf */
|
||||||
opal_dss.copy_payload(orte_process_info.sync_buf, buffer);
|
opal_dss.copy_payload(orte_proc_info.sync_buf, buffer);
|
||||||
/* flag as complete */
|
/* flag as complete */
|
||||||
sync_recvd = true;
|
sync_recvd = true;
|
||||||
}
|
}
|
||||||
|
@ -112,9 +112,9 @@ static int finalize(void)
|
|||||||
/* if I am an application process, indicate that I am
|
/* if I am an application process, indicate that I am
|
||||||
* truly finalizing prior to departure
|
* truly finalizing prior to departure
|
||||||
*/
|
*/
|
||||||
if (!orte_process_info.hnp &&
|
if (!orte_proc_info.hnp &&
|
||||||
!orte_process_info.daemon &&
|
!orte_proc_info.daemon &&
|
||||||
!orte_process_info.tool) {
|
!orte_proc_info.tool) {
|
||||||
if (ORTE_SUCCESS != (rc = orte_routed_base_register_sync(false))) {
|
if (ORTE_SUCCESS != (rc = orte_routed_base_register_sync(false))) {
|
||||||
ORTE_ERROR_LOG(rc);
|
ORTE_ERROR_LOG(rc);
|
||||||
return rc;
|
return rc;
|
||||||
@ -122,7 +122,7 @@ static int finalize(void)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* if I am the HNP, I need to stop the comm recv */
|
/* if I am the HNP, I need to stop the comm recv */
|
||||||
if (orte_process_info.hnp) {
|
if (orte_proc_info.hnp) {
|
||||||
orte_routed_base_comm_stop();
|
orte_routed_base_comm_stop();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -156,8 +156,8 @@ static int delete_route(orte_process_name_t *proc)
|
|||||||
/* if I am an application process, I don't have any routes
|
/* if I am an application process, I don't have any routes
|
||||||
* so there is nothing for me to do
|
* so there is nothing for me to do
|
||||||
*/
|
*/
|
||||||
if (!orte_process_info.hnp && !orte_process_info.daemon &&
|
if (!orte_proc_info.hnp && !orte_proc_info.daemon &&
|
||||||
!orte_process_info.tool) {
|
!orte_proc_info.tool) {
|
||||||
return ORTE_SUCCESS;
|
return ORTE_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -177,7 +177,7 @@ static int delete_route(orte_process_name_t *proc)
|
|||||||
* in my routing table and thus have nothing to do
|
* in my routing table and thus have nothing to do
|
||||||
* here, just return
|
* here, just return
|
||||||
*/
|
*/
|
||||||
if (orte_process_info.daemon) {
|
if (orte_proc_info.daemon) {
|
||||||
return ORTE_SUCCESS;
|
return ORTE_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -224,8 +224,8 @@ static int update_route(orte_process_name_t *target,
|
|||||||
/* if I am an application process, we don't update the route since
|
/* if I am an application process, we don't update the route since
|
||||||
* we automatically route everything through the local daemon
|
* we automatically route everything through the local daemon
|
||||||
*/
|
*/
|
||||||
if (!orte_process_info.hnp && !orte_process_info.daemon &&
|
if (!orte_proc_info.hnp && !orte_proc_info.daemon &&
|
||||||
!orte_process_info.tool) {
|
!orte_proc_info.tool) {
|
||||||
return ORTE_SUCCESS;
|
return ORTE_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -252,7 +252,7 @@ static int update_route(orte_process_name_t *target,
|
|||||||
* anything to this job family via my HNP - so nothing to do
|
* anything to this job family via my HNP - so nothing to do
|
||||||
* here, just return
|
* here, just return
|
||||||
*/
|
*/
|
||||||
if (orte_process_info.daemon) {
|
if (orte_proc_info.daemon) {
|
||||||
return ORTE_SUCCESS;
|
return ORTE_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -318,8 +318,8 @@ static orte_process_name_t get_route(orte_process_name_t *target)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* if I am an application process, always route via my local daemon */
|
/* if I am an application process, always route via my local daemon */
|
||||||
if (!orte_process_info.hnp && !orte_process_info.daemon &&
|
if (!orte_proc_info.hnp && !orte_proc_info.daemon &&
|
||||||
!orte_process_info.tool) {
|
!orte_proc_info.tool) {
|
||||||
ret = ORTE_PROC_MY_DAEMON;
|
ret = ORTE_PROC_MY_DAEMON;
|
||||||
goto found;
|
goto found;
|
||||||
}
|
}
|
||||||
@ -337,7 +337,7 @@ static orte_process_name_t get_route(orte_process_name_t *target)
|
|||||||
/* IF THIS IS FOR A DIFFERENT JOB FAMILY... */
|
/* IF THIS IS FOR A DIFFERENT JOB FAMILY... */
|
||||||
if (ORTE_JOB_FAMILY(target->jobid) != ORTE_JOB_FAMILY(ORTE_PROC_MY_NAME->jobid)) {
|
if (ORTE_JOB_FAMILY(target->jobid) != ORTE_JOB_FAMILY(ORTE_PROC_MY_NAME->jobid)) {
|
||||||
/* if I am a daemon, route this via the HNP */
|
/* if I am a daemon, route this via the HNP */
|
||||||
if (orte_process_info.daemon) {
|
if (orte_proc_info.daemon) {
|
||||||
ret = ORTE_PROC_MY_HNP;
|
ret = ORTE_PROC_MY_HNP;
|
||||||
goto found;
|
goto found;
|
||||||
}
|
}
|
||||||
@ -498,7 +498,7 @@ static int init_routes(orte_jobid_t job, opal_buffer_t *ndat)
|
|||||||
int rc;
|
int rc;
|
||||||
|
|
||||||
/* if I am a tool, then I stand alone - there is nothing to do */
|
/* if I am a tool, then I stand alone - there is nothing to do */
|
||||||
if (orte_process_info.tool) {
|
if (orte_proc_info.tool) {
|
||||||
return ORTE_SUCCESS;
|
return ORTE_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -506,31 +506,31 @@ static int init_routes(orte_jobid_t job, opal_buffer_t *ndat)
|
|||||||
* from the data sent to me for launch and update the routing tables to
|
* from the data sent to me for launch and update the routing tables to
|
||||||
* point at the daemon for each proc
|
* point at the daemon for each proc
|
||||||
*/
|
*/
|
||||||
if (orte_process_info.daemon) {
|
if (orte_proc_info.daemon) {
|
||||||
|
|
||||||
OPAL_OUTPUT_VERBOSE((1, orte_routed_base_output,
|
OPAL_OUTPUT_VERBOSE((1, orte_routed_base_output,
|
||||||
"%s routed_binomial: init routes for daemon job %s\n\thnp_uri %s",
|
"%s routed_binomial: init routes for daemon job %s\n\thnp_uri %s",
|
||||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||||
ORTE_JOBID_PRINT(job),
|
ORTE_JOBID_PRINT(job),
|
||||||
(NULL == orte_process_info.my_hnp_uri) ? "NULL" : orte_process_info.my_hnp_uri));
|
(NULL == orte_proc_info.my_hnp_uri) ? "NULL" : orte_proc_info.my_hnp_uri));
|
||||||
|
|
||||||
if (NULL == ndat) {
|
if (NULL == ndat) {
|
||||||
/* indicates this is being called during orte_init.
|
/* indicates this is being called during orte_init.
|
||||||
* Get the HNP's name for possible later use
|
* Get the HNP's name for possible later use
|
||||||
*/
|
*/
|
||||||
if (NULL == orte_process_info.my_hnp_uri) {
|
if (NULL == orte_proc_info.my_hnp_uri) {
|
||||||
/* fatal error */
|
/* fatal error */
|
||||||
ORTE_ERROR_LOG(ORTE_ERR_FATAL);
|
ORTE_ERROR_LOG(ORTE_ERR_FATAL);
|
||||||
return ORTE_ERR_FATAL;
|
return ORTE_ERR_FATAL;
|
||||||
}
|
}
|
||||||
/* set the contact info into the hash table */
|
/* set the contact info into the hash table */
|
||||||
if (ORTE_SUCCESS != (rc = orte_rml.set_contact_info(orte_process_info.my_hnp_uri))) {
|
if (ORTE_SUCCESS != (rc = orte_rml.set_contact_info(orte_proc_info.my_hnp_uri))) {
|
||||||
ORTE_ERROR_LOG(rc);
|
ORTE_ERROR_LOG(rc);
|
||||||
return(rc);
|
return(rc);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* extract the hnp name and store it */
|
/* extract the hnp name and store it */
|
||||||
if (ORTE_SUCCESS != (rc = orte_rml_base_parse_uris(orte_process_info.my_hnp_uri,
|
if (ORTE_SUCCESS != (rc = orte_rml_base_parse_uris(orte_proc_info.my_hnp_uri,
|
||||||
ORTE_PROC_MY_HNP, NULL))) {
|
ORTE_PROC_MY_HNP, NULL))) {
|
||||||
ORTE_ERROR_LOG(rc);
|
ORTE_ERROR_LOG(rc);
|
||||||
return rc;
|
return rc;
|
||||||
@ -561,7 +561,7 @@ static int init_routes(orte_jobid_t job, opal_buffer_t *ndat)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
if (orte_process_info.hnp) {
|
if (orte_proc_info.hnp) {
|
||||||
|
|
||||||
OPAL_OUTPUT_VERBOSE((1, orte_routed_base_output,
|
OPAL_OUTPUT_VERBOSE((1, orte_routed_base_output,
|
||||||
"%s routed_binomial: init routes for HNP job %s",
|
"%s routed_binomial: init routes for HNP job %s",
|
||||||
@ -669,10 +669,10 @@ static int init_routes(orte_jobid_t job, opal_buffer_t *ndat)
|
|||||||
OPAL_OUTPUT_VERBOSE((1, orte_routed_base_output,
|
OPAL_OUTPUT_VERBOSE((1, orte_routed_base_output,
|
||||||
"%s routed_binomial: init routes for proc job %s\n\thnp_uri %s\n\tdaemon uri %s",
|
"%s routed_binomial: init routes for proc job %s\n\thnp_uri %s\n\tdaemon uri %s",
|
||||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_JOBID_PRINT(job),
|
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_JOBID_PRINT(job),
|
||||||
(NULL == orte_process_info.my_hnp_uri) ? "NULL" : orte_process_info.my_hnp_uri,
|
(NULL == orte_proc_info.my_hnp_uri) ? "NULL" : orte_proc_info.my_hnp_uri,
|
||||||
(NULL == orte_process_info.my_daemon_uri) ? "NULL" : orte_process_info.my_daemon_uri));
|
(NULL == orte_proc_info.my_daemon_uri) ? "NULL" : orte_proc_info.my_daemon_uri));
|
||||||
|
|
||||||
if (NULL == orte_process_info.my_daemon_uri) {
|
if (NULL == orte_proc_info.my_daemon_uri) {
|
||||||
/* in this module, we absolutely MUST have this information - if
|
/* in this module, we absolutely MUST have this information - if
|
||||||
* we didn't get it, then error out
|
* we didn't get it, then error out
|
||||||
*/
|
*/
|
||||||
@ -691,7 +691,7 @@ static int init_routes(orte_jobid_t job, opal_buffer_t *ndat)
|
|||||||
* to it. This is required to ensure that we -do- send messages to the correct
|
* to it. This is required to ensure that we -do- send messages to the correct
|
||||||
* HNP name
|
* HNP name
|
||||||
*/
|
*/
|
||||||
if (ORTE_SUCCESS != (rc = orte_rml_base_parse_uris(orte_process_info.my_hnp_uri,
|
if (ORTE_SUCCESS != (rc = orte_rml_base_parse_uris(orte_proc_info.my_hnp_uri,
|
||||||
ORTE_PROC_MY_HNP, NULL))) {
|
ORTE_PROC_MY_HNP, NULL))) {
|
||||||
ORTE_ERROR_LOG(rc);
|
ORTE_ERROR_LOG(rc);
|
||||||
return rc;
|
return rc;
|
||||||
@ -701,12 +701,12 @@ static int init_routes(orte_jobid_t job, opal_buffer_t *ndat)
|
|||||||
* the connection, but just tells the RML how to reach the daemon
|
* the connection, but just tells the RML how to reach the daemon
|
||||||
* if/when we attempt to send to it
|
* if/when we attempt to send to it
|
||||||
*/
|
*/
|
||||||
if (ORTE_SUCCESS != (rc = orte_rml.set_contact_info(orte_process_info.my_daemon_uri))) {
|
if (ORTE_SUCCESS != (rc = orte_rml.set_contact_info(orte_proc_info.my_daemon_uri))) {
|
||||||
ORTE_ERROR_LOG(rc);
|
ORTE_ERROR_LOG(rc);
|
||||||
return(rc);
|
return(rc);
|
||||||
}
|
}
|
||||||
/* extract the daemon's name so we can update the routing table */
|
/* extract the daemon's name so we can update the routing table */
|
||||||
if (ORTE_SUCCESS != (rc = orte_rml_base_parse_uris(orte_process_info.my_daemon_uri,
|
if (ORTE_SUCCESS != (rc = orte_rml_base_parse_uris(orte_proc_info.my_daemon_uri,
|
||||||
ORTE_PROC_MY_DAEMON, NULL))) {
|
ORTE_PROC_MY_DAEMON, NULL))) {
|
||||||
ORTE_ERROR_LOG(rc);
|
ORTE_ERROR_LOG(rc);
|
||||||
return rc;
|
return rc;
|
||||||
@ -854,7 +854,7 @@ static int update_routing_tree(void)
|
|||||||
/* if I am anything other than a daemon or the HNP, this
|
/* if I am anything other than a daemon or the HNP, this
|
||||||
* is a meaningless command as I am not allowed to route
|
* is a meaningless command as I am not allowed to route
|
||||||
*/
|
*/
|
||||||
if (!orte_process_info.daemon && !orte_process_info.hnp) {
|
if (!orte_proc_info.daemon && !orte_proc_info.hnp) {
|
||||||
return ORTE_ERR_NOT_SUPPORTED;
|
return ORTE_ERR_NOT_SUPPORTED;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -868,7 +868,7 @@ static int update_routing_tree(void)
|
|||||||
* lie underneath their branch
|
* lie underneath their branch
|
||||||
*/
|
*/
|
||||||
my_parent.vpid = binomial_tree(0, 0, ORTE_PROC_MY_NAME->vpid,
|
my_parent.vpid = binomial_tree(0, 0, ORTE_PROC_MY_NAME->vpid,
|
||||||
orte_process_info.num_procs,
|
orte_proc_info.num_procs,
|
||||||
&num_children, &my_children, NULL);
|
&num_children, &my_children, NULL);
|
||||||
|
|
||||||
if (0 < opal_output_get_verbosity(orte_routed_base_output)) {
|
if (0 < opal_output_get_verbosity(orte_routed_base_output)) {
|
||||||
@ -878,7 +878,7 @@ static int update_routing_tree(void)
|
|||||||
item = opal_list_get_next(item)) {
|
item = opal_list_get_next(item)) {
|
||||||
child = (orte_routed_tree_t*)item;
|
child = (orte_routed_tree_t*)item;
|
||||||
opal_output(0, "%s: \tchild %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), child->vpid);
|
opal_output(0, "%s: \tchild %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), child->vpid);
|
||||||
for (j=0; j < (int)orte_process_info.num_procs; j++) {
|
for (j=0; j < (int)orte_proc_info.num_procs; j++) {
|
||||||
if (opal_bitmap_is_set_bit(&child->relatives, j)) {
|
if (opal_bitmap_is_set_bit(&child->relatives, j)) {
|
||||||
opal_output(0, "%s: \t\trelation %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), j);
|
opal_output(0, "%s: \t\trelation %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), j);
|
||||||
}
|
}
|
||||||
@ -897,7 +897,7 @@ static orte_vpid_t get_routing_tree(opal_list_t *children)
|
|||||||
/* if I am anything other than a daemon or the HNP, this
|
/* if I am anything other than a daemon or the HNP, this
|
||||||
* is a meaningless command as I am not allowed to route
|
* is a meaningless command as I am not allowed to route
|
||||||
*/
|
*/
|
||||||
if (!orte_process_info.daemon && !orte_process_info.hnp) {
|
if (!orte_proc_info.daemon && !orte_proc_info.hnp) {
|
||||||
return ORTE_VPID_INVALID;
|
return ORTE_VPID_INVALID;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -928,7 +928,7 @@ static int get_wireup_info(opal_buffer_t *buf)
|
|||||||
* is a meaningless command as I cannot get
|
* is a meaningless command as I cannot get
|
||||||
* the requested info
|
* the requested info
|
||||||
*/
|
*/
|
||||||
if (!orte_process_info.hnp) {
|
if (!orte_proc_info.hnp) {
|
||||||
return ORTE_ERR_NOT_SUPPORTED;
|
return ORTE_ERR_NOT_SUPPORTED;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -103,9 +103,9 @@ static int finalize(void)
|
|||||||
/* if I am an application process, indicate that I am
|
/* if I am an application process, indicate that I am
|
||||||
* truly finalizing prior to departure
|
* truly finalizing prior to departure
|
||||||
*/
|
*/
|
||||||
if (!orte_process_info.hnp &&
|
if (!orte_proc_info.hnp &&
|
||||||
!orte_process_info.daemon &&
|
!orte_proc_info.daemon &&
|
||||||
!orte_process_info.tool) {
|
!orte_proc_info.tool) {
|
||||||
if (ORTE_SUCCESS != (rc = orte_routed_base_register_sync(false))) {
|
if (ORTE_SUCCESS != (rc = orte_routed_base_register_sync(false))) {
|
||||||
ORTE_ERROR_LOG(rc);
|
ORTE_ERROR_LOG(rc);
|
||||||
return rc;
|
return rc;
|
||||||
@ -113,7 +113,7 @@ static int finalize(void)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* if I am the HNP, I need to stop the comm recv */
|
/* if I am the HNP, I need to stop the comm recv */
|
||||||
if (orte_process_info.hnp) {
|
if (orte_proc_info.hnp) {
|
||||||
orte_routed_base_comm_stop();
|
orte_routed_base_comm_stop();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -140,8 +140,8 @@ static int delete_route(orte_process_name_t *proc)
|
|||||||
/* if I am an application process, I don't have any routes
|
/* if I am an application process, I don't have any routes
|
||||||
* so there is nothing for me to do
|
* so there is nothing for me to do
|
||||||
*/
|
*/
|
||||||
if (!orte_process_info.hnp && !orte_process_info.daemon &&
|
if (!orte_proc_info.hnp && !orte_proc_info.daemon &&
|
||||||
!orte_process_info.tool) {
|
!orte_proc_info.tool) {
|
||||||
return ORTE_SUCCESS;
|
return ORTE_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -161,7 +161,7 @@ static int delete_route(orte_process_name_t *proc)
|
|||||||
* in my routing table and thus have nothing to do
|
* in my routing table and thus have nothing to do
|
||||||
* here, just return
|
* here, just return
|
||||||
*/
|
*/
|
||||||
if (orte_process_info.daemon) {
|
if (orte_proc_info.daemon) {
|
||||||
return ORTE_SUCCESS;
|
return ORTE_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -208,8 +208,8 @@ static int update_route(orte_process_name_t *target,
|
|||||||
/* if I am an application process, we don't update the route since
|
/* if I am an application process, we don't update the route since
|
||||||
* we automatically route everything through the local daemon
|
* we automatically route everything through the local daemon
|
||||||
*/
|
*/
|
||||||
if (!orte_process_info.hnp && !orte_process_info.daemon &&
|
if (!orte_proc_info.hnp && !orte_proc_info.daemon &&
|
||||||
!orte_process_info.tool) {
|
!orte_proc_info.tool) {
|
||||||
return ORTE_SUCCESS;
|
return ORTE_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -236,7 +236,7 @@ static int update_route(orte_process_name_t *target,
|
|||||||
* anything to this job family via my HNP - so nothing to do
|
* anything to this job family via my HNP - so nothing to do
|
||||||
* here, just return
|
* here, just return
|
||||||
*/
|
*/
|
||||||
if (orte_process_info.daemon) {
|
if (orte_proc_info.daemon) {
|
||||||
return ORTE_SUCCESS;
|
return ORTE_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -296,8 +296,8 @@ static orte_process_name_t get_route(orte_process_name_t *target)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* if I am an application process, always route via my local daemon */
|
/* if I am an application process, always route via my local daemon */
|
||||||
if (!orte_process_info.hnp && !orte_process_info.daemon &&
|
if (!orte_proc_info.hnp && !orte_proc_info.daemon &&
|
||||||
!orte_process_info.tool) {
|
!orte_proc_info.tool) {
|
||||||
ret = ORTE_PROC_MY_DAEMON;
|
ret = ORTE_PROC_MY_DAEMON;
|
||||||
goto found;
|
goto found;
|
||||||
}
|
}
|
||||||
@ -315,7 +315,7 @@ static orte_process_name_t get_route(orte_process_name_t *target)
|
|||||||
/* IF THIS IS FOR A DIFFERENT JOB FAMILY... */
|
/* IF THIS IS FOR A DIFFERENT JOB FAMILY... */
|
||||||
if (ORTE_JOB_FAMILY(target->jobid) != ORTE_JOB_FAMILY(ORTE_PROC_MY_NAME->jobid)) {
|
if (ORTE_JOB_FAMILY(target->jobid) != ORTE_JOB_FAMILY(ORTE_PROC_MY_NAME->jobid)) {
|
||||||
/* if I am a daemon, route this via the HNP */
|
/* if I am a daemon, route this via the HNP */
|
||||||
if (orte_process_info.daemon) {
|
if (orte_proc_info.daemon) {
|
||||||
ret = ORTE_PROC_MY_HNP;
|
ret = ORTE_PROC_MY_HNP;
|
||||||
goto found;
|
goto found;
|
||||||
}
|
}
|
||||||
@ -368,7 +368,7 @@ static orte_process_name_t get_route(orte_process_name_t *target)
|
|||||||
daemon.vpid = ORTE_PROC_MY_NAME->vpid - 1;
|
daemon.vpid = ORTE_PROC_MY_NAME->vpid - 1;
|
||||||
ret = &daemon;
|
ret = &daemon;
|
||||||
} else {
|
} else {
|
||||||
if (ORTE_PROC_MY_NAME->vpid < orte_process_info.num_procs-1) {
|
if (ORTE_PROC_MY_NAME->vpid < orte_proc_info.num_procs-1) {
|
||||||
daemon.vpid = ORTE_PROC_MY_NAME->vpid + 1;
|
daemon.vpid = ORTE_PROC_MY_NAME->vpid + 1;
|
||||||
} else {
|
} else {
|
||||||
/* we are at end of chain - wrap around */
|
/* we are at end of chain - wrap around */
|
||||||
@ -493,7 +493,7 @@ static int init_routes(orte_jobid_t job, opal_buffer_t *ndat)
|
|||||||
int rc;
|
int rc;
|
||||||
|
|
||||||
/* if I am a tool, then I stand alone - there is nothing to do */
|
/* if I am a tool, then I stand alone - there is nothing to do */
|
||||||
if (orte_process_info.tool) {
|
if (orte_proc_info.tool) {
|
||||||
return ORTE_SUCCESS;
|
return ORTE_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -501,31 +501,31 @@ static int init_routes(orte_jobid_t job, opal_buffer_t *ndat)
|
|||||||
* from the data sent to me for launch and update the routing tables to
|
* from the data sent to me for launch and update the routing tables to
|
||||||
* point at the daemon for each proc
|
* point at the daemon for each proc
|
||||||
*/
|
*/
|
||||||
if (orte_process_info.daemon) {
|
if (orte_proc_info.daemon) {
|
||||||
|
|
||||||
OPAL_OUTPUT_VERBOSE((1, orte_routed_base_output,
|
OPAL_OUTPUT_VERBOSE((1, orte_routed_base_output,
|
||||||
"%s routed_linear: init routes for daemon job %s\n\thnp_uri %s",
|
"%s routed_linear: init routes for daemon job %s\n\thnp_uri %s",
|
||||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||||
ORTE_JOBID_PRINT(job),
|
ORTE_JOBID_PRINT(job),
|
||||||
(NULL == orte_process_info.my_hnp_uri) ? "NULL" : orte_process_info.my_hnp_uri));
|
(NULL == orte_proc_info.my_hnp_uri) ? "NULL" : orte_proc_info.my_hnp_uri));
|
||||||
|
|
||||||
if (NULL == ndat) {
|
if (NULL == ndat) {
|
||||||
/* indicates this is being called during orte_init.
|
/* indicates this is being called during orte_init.
|
||||||
* Get the HNP's name for possible later use
|
* Get the HNP's name for possible later use
|
||||||
*/
|
*/
|
||||||
if (NULL == orte_process_info.my_hnp_uri) {
|
if (NULL == orte_proc_info.my_hnp_uri) {
|
||||||
/* fatal error */
|
/* fatal error */
|
||||||
ORTE_ERROR_LOG(ORTE_ERR_FATAL);
|
ORTE_ERROR_LOG(ORTE_ERR_FATAL);
|
||||||
return ORTE_ERR_FATAL;
|
return ORTE_ERR_FATAL;
|
||||||
}
|
}
|
||||||
/* set the contact info into the hash table */
|
/* set the contact info into the hash table */
|
||||||
if (ORTE_SUCCESS != (rc = orte_rml.set_contact_info(orte_process_info.my_hnp_uri))) {
|
if (ORTE_SUCCESS != (rc = orte_rml.set_contact_info(orte_proc_info.my_hnp_uri))) {
|
||||||
ORTE_ERROR_LOG(rc);
|
ORTE_ERROR_LOG(rc);
|
||||||
return(rc);
|
return(rc);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* extract the hnp name and store it */
|
/* extract the hnp name and store it */
|
||||||
if (ORTE_SUCCESS != (rc = orte_rml_base_parse_uris(orte_process_info.my_hnp_uri,
|
if (ORTE_SUCCESS != (rc = orte_rml_base_parse_uris(orte_proc_info.my_hnp_uri,
|
||||||
ORTE_PROC_MY_HNP, NULL))) {
|
ORTE_PROC_MY_HNP, NULL))) {
|
||||||
ORTE_ERROR_LOG(rc);
|
ORTE_ERROR_LOG(rc);
|
||||||
return rc;
|
return rc;
|
||||||
@ -556,7 +556,7 @@ static int init_routes(orte_jobid_t job, opal_buffer_t *ndat)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
if (orte_process_info.hnp) {
|
if (orte_proc_info.hnp) {
|
||||||
|
|
||||||
OPAL_OUTPUT_VERBOSE((1, orte_routed_base_output,
|
OPAL_OUTPUT_VERBOSE((1, orte_routed_base_output,
|
||||||
"%s routed_linear: init routes for HNP job %s",
|
"%s routed_linear: init routes for HNP job %s",
|
||||||
@ -664,10 +664,10 @@ static int init_routes(orte_jobid_t job, opal_buffer_t *ndat)
|
|||||||
OPAL_OUTPUT_VERBOSE((1, orte_routed_base_output,
|
OPAL_OUTPUT_VERBOSE((1, orte_routed_base_output,
|
||||||
"%s routed_linear: init routes for proc job %s\n\thnp_uri %s\n\tdaemon uri %s",
|
"%s routed_linear: init routes for proc job %s\n\thnp_uri %s\n\tdaemon uri %s",
|
||||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_JOBID_PRINT(job),
|
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_JOBID_PRINT(job),
|
||||||
(NULL == orte_process_info.my_hnp_uri) ? "NULL" : orte_process_info.my_hnp_uri,
|
(NULL == orte_proc_info.my_hnp_uri) ? "NULL" : orte_proc_info.my_hnp_uri,
|
||||||
(NULL == orte_process_info.my_daemon_uri) ? "NULL" : orte_process_info.my_daemon_uri));
|
(NULL == orte_proc_info.my_daemon_uri) ? "NULL" : orte_proc_info.my_daemon_uri));
|
||||||
|
|
||||||
if (NULL == orte_process_info.my_daemon_uri) {
|
if (NULL == orte_proc_info.my_daemon_uri) {
|
||||||
/* in this module, we absolutely MUST have this information - if
|
/* in this module, we absolutely MUST have this information - if
|
||||||
* we didn't get it, then error out
|
* we didn't get it, then error out
|
||||||
*/
|
*/
|
||||||
@ -686,7 +686,7 @@ static int init_routes(orte_jobid_t job, opal_buffer_t *ndat)
|
|||||||
* to it. This is required to ensure that we -do- send messages to the correct
|
* to it. This is required to ensure that we -do- send messages to the correct
|
||||||
* HNP name
|
* HNP name
|
||||||
*/
|
*/
|
||||||
if (ORTE_SUCCESS != (rc = orte_rml_base_parse_uris(orte_process_info.my_hnp_uri,
|
if (ORTE_SUCCESS != (rc = orte_rml_base_parse_uris(orte_proc_info.my_hnp_uri,
|
||||||
ORTE_PROC_MY_HNP, NULL))) {
|
ORTE_PROC_MY_HNP, NULL))) {
|
||||||
ORTE_ERROR_LOG(rc);
|
ORTE_ERROR_LOG(rc);
|
||||||
return rc;
|
return rc;
|
||||||
@ -696,12 +696,12 @@ static int init_routes(orte_jobid_t job, opal_buffer_t *ndat)
|
|||||||
* the connection, but just tells the RML how to reach the daemon
|
* the connection, but just tells the RML how to reach the daemon
|
||||||
* if/when we attempt to send to it
|
* if/when we attempt to send to it
|
||||||
*/
|
*/
|
||||||
if (ORTE_SUCCESS != (rc = orte_rml.set_contact_info(orte_process_info.my_daemon_uri))) {
|
if (ORTE_SUCCESS != (rc = orte_rml.set_contact_info(orte_proc_info.my_daemon_uri))) {
|
||||||
ORTE_ERROR_LOG(rc);
|
ORTE_ERROR_LOG(rc);
|
||||||
return(rc);
|
return(rc);
|
||||||
}
|
}
|
||||||
/* extract the daemon's name so we can update the routing table */
|
/* extract the daemon's name so we can update the routing table */
|
||||||
if (ORTE_SUCCESS != (rc = orte_rml_base_parse_uris(orte_process_info.my_daemon_uri,
|
if (ORTE_SUCCESS != (rc = orte_rml_base_parse_uris(orte_proc_info.my_daemon_uri,
|
||||||
ORTE_PROC_MY_DAEMON, NULL))) {
|
ORTE_PROC_MY_DAEMON, NULL))) {
|
||||||
ORTE_ERROR_LOG(rc);
|
ORTE_ERROR_LOG(rc);
|
||||||
return rc;
|
return rc;
|
||||||
@ -784,7 +784,7 @@ static int update_routing_tree(void)
|
|||||||
/* if I am anything other than a daemon or the HNP, this
|
/* if I am anything other than a daemon or the HNP, this
|
||||||
* is a meaningless command as I am not allowed to route
|
* is a meaningless command as I am not allowed to route
|
||||||
*/
|
*/
|
||||||
if (!orte_process_info.daemon && !orte_process_info.hnp) {
|
if (!orte_proc_info.daemon && !orte_proc_info.hnp) {
|
||||||
return ORTE_ERR_NOT_SUPPORTED;
|
return ORTE_ERR_NOT_SUPPORTED;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -800,28 +800,28 @@ static orte_vpid_t get_routing_tree(opal_list_t *children)
|
|||||||
/* if I am anything other than a daemon or the HNP, this
|
/* if I am anything other than a daemon or the HNP, this
|
||||||
* is a meaningless command as I am not allowed to route
|
* is a meaningless command as I am not allowed to route
|
||||||
*/
|
*/
|
||||||
if (!orte_process_info.daemon && !orte_process_info.hnp) {
|
if (!orte_proc_info.daemon && !orte_proc_info.hnp) {
|
||||||
return ORTE_VPID_INVALID;
|
return ORTE_VPID_INVALID;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* the linear routing tree consists of a chain of daemons
|
/* the linear routing tree consists of a chain of daemons
|
||||||
* extending from the HNP to orte_process_info.num_procs-1.
|
* extending from the HNP to orte_proc_info.num_procs-1.
|
||||||
* Accordingly, my child is just the my_vpid+1 daemon
|
* Accordingly, my child is just the my_vpid+1 daemon
|
||||||
*/
|
*/
|
||||||
if (NULL != children &&
|
if (NULL != children &&
|
||||||
ORTE_PROC_MY_NAME->vpid < orte_process_info.num_procs-1) {
|
ORTE_PROC_MY_NAME->vpid < orte_proc_info.num_procs-1) {
|
||||||
/* my child is just the vpid+1 daemon */
|
/* my child is just the vpid+1 daemon */
|
||||||
nm = OBJ_NEW(orte_routed_tree_t);
|
nm = OBJ_NEW(orte_routed_tree_t);
|
||||||
opal_bitmap_init(&nm->relatives, orte_process_info.num_procs);
|
opal_bitmap_init(&nm->relatives, orte_proc_info.num_procs);
|
||||||
nm->vpid = ORTE_PROC_MY_NAME->vpid + 1;
|
nm->vpid = ORTE_PROC_MY_NAME->vpid + 1;
|
||||||
/* my relatives are everyone above that point */
|
/* my relatives are everyone above that point */
|
||||||
for (v=nm->vpid+1; v < orte_process_info.num_procs; v++) {
|
for (v=nm->vpid+1; v < orte_proc_info.num_procs; v++) {
|
||||||
opal_bitmap_set_bit(&nm->relatives, v);
|
opal_bitmap_set_bit(&nm->relatives, v);
|
||||||
}
|
}
|
||||||
opal_list_append(children, &nm->super);
|
opal_list_append(children, &nm->super);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (orte_process_info.hnp) {
|
if (orte_proc_info.hnp) {
|
||||||
/* the parent of the HNP is invalid */
|
/* the parent of the HNP is invalid */
|
||||||
return ORTE_VPID_INVALID;
|
return ORTE_VPID_INVALID;
|
||||||
}
|
}
|
||||||
@ -839,7 +839,7 @@ static int get_wireup_info(opal_buffer_t *buf)
|
|||||||
* is a meaningless command as I cannot get
|
* is a meaningless command as I cannot get
|
||||||
* the requested info
|
* the requested info
|
||||||
*/
|
*/
|
||||||
if (!orte_process_info.hnp) {
|
if (!orte_proc_info.hnp) {
|
||||||
return ORTE_ERR_NOT_SUPPORTED;
|
return ORTE_ERR_NOT_SUPPORTED;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -113,9 +113,9 @@ static int finalize(void)
|
|||||||
/* if I am an application process, indicate that I am
|
/* if I am an application process, indicate that I am
|
||||||
* truly finalizing prior to departure
|
* truly finalizing prior to departure
|
||||||
*/
|
*/
|
||||||
if (!orte_process_info.hnp &&
|
if (!orte_proc_info.hnp &&
|
||||||
!orte_process_info.daemon &&
|
!orte_proc_info.daemon &&
|
||||||
!orte_process_info.tool) {
|
!orte_proc_info.tool) {
|
||||||
if (ORTE_SUCCESS != (rc = orte_routed_base_register_sync(false))) {
|
if (ORTE_SUCCESS != (rc = orte_routed_base_register_sync(false))) {
|
||||||
ORTE_ERROR_LOG(rc);
|
ORTE_ERROR_LOG(rc);
|
||||||
return rc;
|
return rc;
|
||||||
@ -123,7 +123,7 @@ static int finalize(void)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* if I am the HNP, I need to stop the comm recv */
|
/* if I am the HNP, I need to stop the comm recv */
|
||||||
if (orte_process_info.hnp) {
|
if (orte_proc_info.hnp) {
|
||||||
orte_routed_base_comm_stop();
|
orte_routed_base_comm_stop();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -157,8 +157,8 @@ static int delete_route(orte_process_name_t *proc)
|
|||||||
/* if I am an application process, I don't have any routes
|
/* if I am an application process, I don't have any routes
|
||||||
* so there is nothing for me to do
|
* so there is nothing for me to do
|
||||||
*/
|
*/
|
||||||
if (!orte_process_info.hnp && !orte_process_info.daemon &&
|
if (!orte_proc_info.hnp && !orte_proc_info.daemon &&
|
||||||
!orte_process_info.tool) {
|
!orte_proc_info.tool) {
|
||||||
return ORTE_SUCCESS;
|
return ORTE_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -178,7 +178,7 @@ static int delete_route(orte_process_name_t *proc)
|
|||||||
* in my routing table and thus have nothing to do
|
* in my routing table and thus have nothing to do
|
||||||
* here, just return
|
* here, just return
|
||||||
*/
|
*/
|
||||||
if (orte_process_info.daemon) {
|
if (orte_proc_info.daemon) {
|
||||||
return ORTE_SUCCESS;
|
return ORTE_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -225,8 +225,8 @@ static int update_route(orte_process_name_t *target,
|
|||||||
/* if I am an application process, we don't update the route since
|
/* if I am an application process, we don't update the route since
|
||||||
* we automatically route everything through the local daemon
|
* we automatically route everything through the local daemon
|
||||||
*/
|
*/
|
||||||
if (!orte_process_info.hnp && !orte_process_info.daemon &&
|
if (!orte_proc_info.hnp && !orte_proc_info.daemon &&
|
||||||
!orte_process_info.tool) {
|
!orte_proc_info.tool) {
|
||||||
return ORTE_SUCCESS;
|
return ORTE_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -253,7 +253,7 @@ static int update_route(orte_process_name_t *target,
|
|||||||
* anything to this job family via my HNP - so nothing to do
|
* anything to this job family via my HNP - so nothing to do
|
||||||
* here, just return
|
* here, just return
|
||||||
*/
|
*/
|
||||||
if (orte_process_info.daemon) {
|
if (orte_proc_info.daemon) {
|
||||||
return ORTE_SUCCESS;
|
return ORTE_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -321,8 +321,8 @@ static orte_process_name_t get_route(orte_process_name_t *target)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* if I am an application process, always route via my local daemon */
|
/* if I am an application process, always route via my local daemon */
|
||||||
if (!orte_process_info.hnp && !orte_process_info.daemon &&
|
if (!orte_proc_info.hnp && !orte_proc_info.daemon &&
|
||||||
!orte_process_info.tool) {
|
!orte_proc_info.tool) {
|
||||||
ret = ORTE_PROC_MY_DAEMON;
|
ret = ORTE_PROC_MY_DAEMON;
|
||||||
goto found;
|
goto found;
|
||||||
}
|
}
|
||||||
@ -340,7 +340,7 @@ static orte_process_name_t get_route(orte_process_name_t *target)
|
|||||||
/* IF THIS IS FOR A DIFFERENT JOB FAMILY... */
|
/* IF THIS IS FOR A DIFFERENT JOB FAMILY... */
|
||||||
if (ORTE_JOB_FAMILY(target->jobid) != ORTE_JOB_FAMILY(ORTE_PROC_MY_NAME->jobid)) {
|
if (ORTE_JOB_FAMILY(target->jobid) != ORTE_JOB_FAMILY(ORTE_PROC_MY_NAME->jobid)) {
|
||||||
/* if I am a daemon, route this via the HNP */
|
/* if I am a daemon, route this via the HNP */
|
||||||
if (orte_process_info.daemon) {
|
if (orte_proc_info.daemon) {
|
||||||
ret = ORTE_PROC_MY_HNP;
|
ret = ORTE_PROC_MY_HNP;
|
||||||
goto found;
|
goto found;
|
||||||
}
|
}
|
||||||
@ -525,7 +525,7 @@ static int init_routes(orte_jobid_t job, opal_buffer_t *ndat)
|
|||||||
int rc;
|
int rc;
|
||||||
|
|
||||||
/* if I am a tool, then I stand alone - there is nothing to do */
|
/* if I am a tool, then I stand alone - there is nothing to do */
|
||||||
if (orte_process_info.tool) {
|
if (orte_proc_info.tool) {
|
||||||
return ORTE_SUCCESS;
|
return ORTE_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -533,31 +533,31 @@ static int init_routes(orte_jobid_t job, opal_buffer_t *ndat)
|
|||||||
* from the data sent to me for launch and update the routing tables to
|
* from the data sent to me for launch and update the routing tables to
|
||||||
* point at the daemon for each proc
|
* point at the daemon for each proc
|
||||||
*/
|
*/
|
||||||
if (orte_process_info.daemon) {
|
if (orte_proc_info.daemon) {
|
||||||
|
|
||||||
OPAL_OUTPUT_VERBOSE((1, orte_routed_base_output,
|
OPAL_OUTPUT_VERBOSE((1, orte_routed_base_output,
|
||||||
"%s routed_radix: init routes for daemon job %s\n\thnp_uri %s",
|
"%s routed_radix: init routes for daemon job %s\n\thnp_uri %s",
|
||||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||||
ORTE_JOBID_PRINT(job),
|
ORTE_JOBID_PRINT(job),
|
||||||
(NULL == orte_process_info.my_hnp_uri) ? "NULL" : orte_process_info.my_hnp_uri));
|
(NULL == orte_proc_info.my_hnp_uri) ? "NULL" : orte_proc_info.my_hnp_uri));
|
||||||
|
|
||||||
if (NULL == ndat) {
|
if (NULL == ndat) {
|
||||||
/* indicates this is being called during orte_init.
|
/* indicates this is being called during orte_init.
|
||||||
* Get the HNP's name for possible later use
|
* Get the HNP's name for possible later use
|
||||||
*/
|
*/
|
||||||
if (NULL == orte_process_info.my_hnp_uri) {
|
if (NULL == orte_proc_info.my_hnp_uri) {
|
||||||
/* fatal error */
|
/* fatal error */
|
||||||
ORTE_ERROR_LOG(ORTE_ERR_FATAL);
|
ORTE_ERROR_LOG(ORTE_ERR_FATAL);
|
||||||
return ORTE_ERR_FATAL;
|
return ORTE_ERR_FATAL;
|
||||||
}
|
}
|
||||||
/* set the contact info into the hash table */
|
/* set the contact info into the hash table */
|
||||||
if (ORTE_SUCCESS != (rc = orte_rml.set_contact_info(orte_process_info.my_hnp_uri))) {
|
if (ORTE_SUCCESS != (rc = orte_rml.set_contact_info(orte_proc_info.my_hnp_uri))) {
|
||||||
ORTE_ERROR_LOG(rc);
|
ORTE_ERROR_LOG(rc);
|
||||||
return(rc);
|
return(rc);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* extract the hnp name and store it */
|
/* extract the hnp name and store it */
|
||||||
if (ORTE_SUCCESS != (rc = orte_rml_base_parse_uris(orte_process_info.my_hnp_uri,
|
if (ORTE_SUCCESS != (rc = orte_rml_base_parse_uris(orte_proc_info.my_hnp_uri,
|
||||||
ORTE_PROC_MY_HNP, NULL))) {
|
ORTE_PROC_MY_HNP, NULL))) {
|
||||||
ORTE_ERROR_LOG(rc);
|
ORTE_ERROR_LOG(rc);
|
||||||
return rc;
|
return rc;
|
||||||
@ -588,7 +588,7 @@ static int init_routes(orte_jobid_t job, opal_buffer_t *ndat)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
if (orte_process_info.hnp) {
|
if (orte_proc_info.hnp) {
|
||||||
|
|
||||||
OPAL_OUTPUT_VERBOSE((1, orte_routed_base_output,
|
OPAL_OUTPUT_VERBOSE((1, orte_routed_base_output,
|
||||||
"%s routed_radix: init routes for HNP job %s",
|
"%s routed_radix: init routes for HNP job %s",
|
||||||
@ -696,10 +696,10 @@ static int init_routes(orte_jobid_t job, opal_buffer_t *ndat)
|
|||||||
OPAL_OUTPUT_VERBOSE((1, orte_routed_base_output,
|
OPAL_OUTPUT_VERBOSE((1, orte_routed_base_output,
|
||||||
"%s routed_radix: init routes for proc job %s\n\thnp_uri %s\n\tdaemon uri %s",
|
"%s routed_radix: init routes for proc job %s\n\thnp_uri %s\n\tdaemon uri %s",
|
||||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_JOBID_PRINT(job),
|
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_JOBID_PRINT(job),
|
||||||
(NULL == orte_process_info.my_hnp_uri) ? "NULL" : orte_process_info.my_hnp_uri,
|
(NULL == orte_proc_info.my_hnp_uri) ? "NULL" : orte_proc_info.my_hnp_uri,
|
||||||
(NULL == orte_process_info.my_daemon_uri) ? "NULL" : orte_process_info.my_daemon_uri));
|
(NULL == orte_proc_info.my_daemon_uri) ? "NULL" : orte_proc_info.my_daemon_uri));
|
||||||
|
|
||||||
if (NULL == orte_process_info.my_daemon_uri) {
|
if (NULL == orte_proc_info.my_daemon_uri) {
|
||||||
/* in this module, we absolutely MUST have this information - if
|
/* in this module, we absolutely MUST have this information - if
|
||||||
* we didn't get it, then error out
|
* we didn't get it, then error out
|
||||||
*/
|
*/
|
||||||
@ -718,7 +718,7 @@ static int init_routes(orte_jobid_t job, opal_buffer_t *ndat)
|
|||||||
* to it. This is required to ensure that we -do- send messages to the correct
|
* to it. This is required to ensure that we -do- send messages to the correct
|
||||||
* HNP name
|
* HNP name
|
||||||
*/
|
*/
|
||||||
if (ORTE_SUCCESS != (rc = orte_rml_base_parse_uris(orte_process_info.my_hnp_uri,
|
if (ORTE_SUCCESS != (rc = orte_rml_base_parse_uris(orte_proc_info.my_hnp_uri,
|
||||||
ORTE_PROC_MY_HNP, NULL))) {
|
ORTE_PROC_MY_HNP, NULL))) {
|
||||||
ORTE_ERROR_LOG(rc);
|
ORTE_ERROR_LOG(rc);
|
||||||
return rc;
|
return rc;
|
||||||
@ -728,12 +728,12 @@ static int init_routes(orte_jobid_t job, opal_buffer_t *ndat)
|
|||||||
* the connection, but just tells the RML how to reach the daemon
|
* the connection, but just tells the RML how to reach the daemon
|
||||||
* if/when we attempt to send to it
|
* if/when we attempt to send to it
|
||||||
*/
|
*/
|
||||||
if (ORTE_SUCCESS != (rc = orte_rml.set_contact_info(orte_process_info.my_daemon_uri))) {
|
if (ORTE_SUCCESS != (rc = orte_rml.set_contact_info(orte_proc_info.my_daemon_uri))) {
|
||||||
ORTE_ERROR_LOG(rc);
|
ORTE_ERROR_LOG(rc);
|
||||||
return(rc);
|
return(rc);
|
||||||
}
|
}
|
||||||
/* extract the daemon's name so we can update the routing table */
|
/* extract the daemon's name so we can update the routing table */
|
||||||
if (ORTE_SUCCESS != (rc = orte_rml_base_parse_uris(orte_process_info.my_daemon_uri,
|
if (ORTE_SUCCESS != (rc = orte_rml_base_parse_uris(orte_proc_info.my_daemon_uri,
|
||||||
ORTE_PROC_MY_DAEMON, NULL))) {
|
ORTE_PROC_MY_DAEMON, NULL))) {
|
||||||
ORTE_ERROR_LOG(rc);
|
ORTE_ERROR_LOG(rc);
|
||||||
return rc;
|
return rc;
|
||||||
@ -828,7 +828,7 @@ static void radix_tree(int rank, int *num_children,
|
|||||||
/* our children start at our rank + num_in_level */
|
/* our children start at our rank + num_in_level */
|
||||||
peer = rank + NInLevel;
|
peer = rank + NInLevel;
|
||||||
for (i = 0; i < mca_routed_radix_component.radix; i++) {
|
for (i = 0; i < mca_routed_radix_component.radix; i++) {
|
||||||
if (peer < (int)orte_process_info.num_procs) {
|
if (peer < (int)orte_proc_info.num_procs) {
|
||||||
child = OBJ_NEW(orte_routed_tree_t);
|
child = OBJ_NEW(orte_routed_tree_t);
|
||||||
child->vpid = peer;
|
child->vpid = peer;
|
||||||
if (NULL != children) {
|
if (NULL != children) {
|
||||||
@ -836,7 +836,7 @@ static void radix_tree(int rank, int *num_children,
|
|||||||
opal_list_append(children, &child->super);
|
opal_list_append(children, &child->super);
|
||||||
(*num_children)++;
|
(*num_children)++;
|
||||||
/* setup the relatives bitmap */
|
/* setup the relatives bitmap */
|
||||||
opal_bitmap_init(&child->relatives, orte_process_info.num_procs);
|
opal_bitmap_init(&child->relatives, orte_proc_info.num_procs);
|
||||||
/* point to the relatives */
|
/* point to the relatives */
|
||||||
relations = &child->relatives;
|
relations = &child->relatives;
|
||||||
} else {
|
} else {
|
||||||
@ -865,7 +865,7 @@ static int update_routing_tree(void)
|
|||||||
/* if I am anything other than a daemon or the HNP, this
|
/* if I am anything other than a daemon or the HNP, this
|
||||||
* is a meaningless command as I am not allowed to route
|
* is a meaningless command as I am not allowed to route
|
||||||
*/
|
*/
|
||||||
if (!orte_process_info.daemon && !orte_process_info.hnp) {
|
if (!orte_proc_info.daemon && !orte_proc_info.hnp) {
|
||||||
return ORTE_ERR_NOT_SUPPORTED;
|
return ORTE_ERR_NOT_SUPPORTED;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -909,7 +909,7 @@ static int update_routing_tree(void)
|
|||||||
item = opal_list_get_next(item)) {
|
item = opal_list_get_next(item)) {
|
||||||
child = (orte_routed_tree_t*)item;
|
child = (orte_routed_tree_t*)item;
|
||||||
opal_output(0, "%s: \tchild %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), child->vpid);
|
opal_output(0, "%s: \tchild %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), child->vpid);
|
||||||
for (j=0; j < (int)orte_process_info.num_procs; j++) {
|
for (j=0; j < (int)orte_proc_info.num_procs; j++) {
|
||||||
if (opal_bitmap_is_set_bit(&child->relatives, j)) {
|
if (opal_bitmap_is_set_bit(&child->relatives, j)) {
|
||||||
opal_output(0, "%s: \t\trelation %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), j);
|
opal_output(0, "%s: \t\trelation %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), j);
|
||||||
}
|
}
|
||||||
@ -928,7 +928,7 @@ static orte_vpid_t get_routing_tree(opal_list_t *children)
|
|||||||
/* if I am anything other than a daemon or the HNP, this
|
/* if I am anything other than a daemon or the HNP, this
|
||||||
* is a meaningless command as I am not allowed to route
|
* is a meaningless command as I am not allowed to route
|
||||||
*/
|
*/
|
||||||
if (!orte_process_info.daemon && !orte_process_info.hnp) {
|
if (!orte_proc_info.daemon && !orte_proc_info.hnp) {
|
||||||
return ORTE_VPID_INVALID;
|
return ORTE_VPID_INVALID;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -958,7 +958,7 @@ static int get_wireup_info(opal_buffer_t *buf)
|
|||||||
* is a meaningless command as I cannot get
|
* is a meaningless command as I cannot get
|
||||||
* the requested info
|
* the requested info
|
||||||
*/
|
*/
|
||||||
if (!orte_process_info.hnp) {
|
if (!orte_proc_info.hnp) {
|
||||||
return ORTE_ERR_NOT_SUPPORTED;
|
return ORTE_ERR_NOT_SUPPORTED;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -176,10 +176,10 @@ static int init_routes(orte_jobid_t job, opal_buffer_t *ndat)
|
|||||||
OPAL_OUTPUT_VERBOSE((1, orte_routed_base_output,
|
OPAL_OUTPUT_VERBOSE((1, orte_routed_base_output,
|
||||||
"%s routed_slave: init routes for proc job %s\n\thnp_uri %s\n\tdaemon uri %s",
|
"%s routed_slave: init routes for proc job %s\n\thnp_uri %s\n\tdaemon uri %s",
|
||||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_JOBID_PRINT(job),
|
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_JOBID_PRINT(job),
|
||||||
(NULL == orte_process_info.my_hnp_uri) ? "NULL" : orte_process_info.my_hnp_uri,
|
(NULL == orte_proc_info.my_hnp_uri) ? "NULL" : orte_proc_info.my_hnp_uri,
|
||||||
(NULL == orte_process_info.my_daemon_uri) ? "NULL" : orte_process_info.my_daemon_uri));
|
(NULL == orte_proc_info.my_daemon_uri) ? "NULL" : orte_proc_info.my_daemon_uri));
|
||||||
|
|
||||||
if (NULL == orte_process_info.my_daemon_uri) {
|
if (NULL == orte_proc_info.my_daemon_uri) {
|
||||||
/* in this module, we absolutely MUST have this information - if
|
/* in this module, we absolutely MUST have this information - if
|
||||||
* we didn't get it, then error out
|
* we didn't get it, then error out
|
||||||
*/
|
*/
|
||||||
@ -198,7 +198,7 @@ static int init_routes(orte_jobid_t job, opal_buffer_t *ndat)
|
|||||||
* to it. This is required to ensure that we -do- send messages to the correct
|
* to it. This is required to ensure that we -do- send messages to the correct
|
||||||
* HNP name
|
* HNP name
|
||||||
*/
|
*/
|
||||||
if (ORTE_SUCCESS != (rc = orte_rml_base_parse_uris(orte_process_info.my_hnp_uri,
|
if (ORTE_SUCCESS != (rc = orte_rml_base_parse_uris(orte_proc_info.my_hnp_uri,
|
||||||
ORTE_PROC_MY_HNP, NULL))) {
|
ORTE_PROC_MY_HNP, NULL))) {
|
||||||
ORTE_ERROR_LOG(rc);
|
ORTE_ERROR_LOG(rc);
|
||||||
return rc;
|
return rc;
|
||||||
@ -208,12 +208,12 @@ static int init_routes(orte_jobid_t job, opal_buffer_t *ndat)
|
|||||||
* the connection, but just tells the RML how to reach the daemon
|
* the connection, but just tells the RML how to reach the daemon
|
||||||
* if/when we attempt to send to it
|
* if/when we attempt to send to it
|
||||||
*/
|
*/
|
||||||
if (ORTE_SUCCESS != (rc = orte_rml.set_contact_info(orte_process_info.my_daemon_uri))) {
|
if (ORTE_SUCCESS != (rc = orte_rml.set_contact_info(orte_proc_info.my_daemon_uri))) {
|
||||||
ORTE_ERROR_LOG(rc);
|
ORTE_ERROR_LOG(rc);
|
||||||
return(rc);
|
return(rc);
|
||||||
}
|
}
|
||||||
/* extract the daemon's name so we can update the routing table */
|
/* extract the daemon's name so we can update the routing table */
|
||||||
if (ORTE_SUCCESS != (rc = orte_rml_base_parse_uris(orte_process_info.my_daemon_uri,
|
if (ORTE_SUCCESS != (rc = orte_rml_base_parse_uris(orte_proc_info.my_daemon_uri,
|
||||||
ORTE_PROC_MY_DAEMON, NULL))) {
|
ORTE_PROC_MY_DAEMON, NULL))) {
|
||||||
ORTE_ERROR_LOG(rc);
|
ORTE_ERROR_LOG(rc);
|
||||||
return rc;
|
return rc;
|
||||||
|
@ -298,7 +298,7 @@ static int snapc_full_global_start_listener(void)
|
|||||||
int exit_status = ORTE_SUCCESS;
|
int exit_status = ORTE_SUCCESS;
|
||||||
int rc;
|
int rc;
|
||||||
|
|
||||||
if (snapc_recv_issued && orte_process_info.hnp) {
|
if (snapc_recv_issued && orte_proc_info.hnp) {
|
||||||
return ORTE_SUCCESS;
|
return ORTE_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -329,7 +329,7 @@ static int snapc_full_global_stop_listener(void)
|
|||||||
int exit_status = ORTE_SUCCESS;
|
int exit_status = ORTE_SUCCESS;
|
||||||
int rc;
|
int rc;
|
||||||
|
|
||||||
if (!snapc_recv_issued && orte_process_info.hnp) {
|
if (!snapc_recv_issued && orte_proc_info.hnp) {
|
||||||
return ORTE_SUCCESS;
|
return ORTE_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -354,7 +354,7 @@ static int snapc_full_global_start_cmdline_listener(void)
|
|||||||
int exit_status = ORTE_SUCCESS;
|
int exit_status = ORTE_SUCCESS;
|
||||||
int rc;
|
int rc;
|
||||||
|
|
||||||
if (snapc_cmdline_recv_issued && orte_process_info.hnp) {
|
if (snapc_cmdline_recv_issued && orte_proc_info.hnp) {
|
||||||
return ORTE_SUCCESS;
|
return ORTE_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -385,7 +385,7 @@ static int snapc_full_global_stop_cmdline_listener(void)
|
|||||||
int exit_status = ORTE_SUCCESS;
|
int exit_status = ORTE_SUCCESS;
|
||||||
int rc;
|
int rc;
|
||||||
|
|
||||||
if (!snapc_cmdline_recv_issued && orte_process_info.hnp) {
|
if (!snapc_cmdline_recv_issued && orte_proc_info.hnp) {
|
||||||
return ORTE_SUCCESS;
|
return ORTE_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -194,7 +194,7 @@ void orte_daemon_cmd_processor(int fd, short event, void *data)
|
|||||||
orte_daemon_cmd_flag_t command;
|
orte_daemon_cmd_flag_t command;
|
||||||
|
|
||||||
/* check to see if we are in a progress recursion */
|
/* check to see if we are in a progress recursion */
|
||||||
if (orte_process_info.daemon && 1 < (ret = opal_progress_recursion_depth())) {
|
if (orte_proc_info.daemon && 1 < (ret = opal_progress_recursion_depth())) {
|
||||||
/* if we are in a recursion, we want to repost the message event
|
/* if we are in a recursion, we want to repost the message event
|
||||||
* so the progress engine can work its way back up to the top
|
* so the progress engine can work its way back up to the top
|
||||||
* of the stack. Given that this could happen multiple times,
|
* of the stack. Given that this could happen multiple times,
|
||||||
@ -234,7 +234,7 @@ void orte_daemon_cmd_processor(int fd, short event, void *data)
|
|||||||
wait_time = 1;
|
wait_time = 1;
|
||||||
num_recursions = 0;
|
num_recursions = 0;
|
||||||
|
|
||||||
if (orte_timing && orte_process_info.hnp) {
|
if (orte_timing && orte_proc_info.hnp) {
|
||||||
/* if we are doing timing, and we are the HNP, then the message doesn't come
|
/* if we are doing timing, and we are the HNP, then the message doesn't come
|
||||||
* through the RML recv, so we have to pickup the recv time here
|
* through the RML recv, so we have to pickup the recv time here
|
||||||
*/
|
*/
|
||||||
@ -526,7 +526,7 @@ static int process_commands(orte_process_name_t* sender,
|
|||||||
goto CLEANUP;
|
goto CLEANUP;
|
||||||
}
|
}
|
||||||
/* initialize the routes to my peers - this will update the number
|
/* initialize the routes to my peers - this will update the number
|
||||||
* of daemons in the system (i.e., orte_process_info.num_procs) as
|
* of daemons in the system (i.e., orte_proc_info.num_procs) as
|
||||||
* this might have changed
|
* this might have changed
|
||||||
*/
|
*/
|
||||||
if (ORTE_SUCCESS != (ret = orte_routed.init_routes(ORTE_PROC_MY_NAME->jobid, relay_msg))) {
|
if (ORTE_SUCCESS != (ret = orte_routed.init_routes(ORTE_PROC_MY_NAME->jobid, relay_msg))) {
|
||||||
@ -605,7 +605,7 @@ static int process_commands(orte_process_name_t* sender,
|
|||||||
/* if we are the HNP, kill our local procs and
|
/* if we are the HNP, kill our local procs and
|
||||||
* flag we are exited - but don't yet exit
|
* flag we are exited - but don't yet exit
|
||||||
*/
|
*/
|
||||||
if (orte_process_info.hnp) {
|
if (orte_proc_info.hnp) {
|
||||||
orte_job_t *daemons;
|
orte_job_t *daemons;
|
||||||
orte_proc_t **procs;
|
orte_proc_t **procs;
|
||||||
/* if we are the HNP, ensure our local procs are terminated */
|
/* if we are the HNP, ensure our local procs are terminated */
|
||||||
@ -663,7 +663,7 @@ static int process_commands(orte_process_name_t* sender,
|
|||||||
/* if we are the HNP, kill our local procs and
|
/* if we are the HNP, kill our local procs and
|
||||||
* flag we are exited - but don't yet exit
|
* flag we are exited - but don't yet exit
|
||||||
*/
|
*/
|
||||||
if (orte_process_info.hnp) {
|
if (orte_proc_info.hnp) {
|
||||||
orte_job_t *daemons;
|
orte_job_t *daemons;
|
||||||
orte_proc_t **procs;
|
orte_proc_t **procs;
|
||||||
/* if we are the HNP, ensure our local procs are terminated */
|
/* if we are the HNP, ensure our local procs are terminated */
|
||||||
@ -709,7 +709,7 @@ static int process_commands(orte_process_name_t* sender,
|
|||||||
answer = OBJ_NEW(opal_buffer_t);
|
answer = OBJ_NEW(opal_buffer_t);
|
||||||
job = ORTE_JOBID_INVALID;
|
job = ORTE_JOBID_INVALID;
|
||||||
/* can only process this if we are the HNP */
|
/* can only process this if we are the HNP */
|
||||||
if (orte_process_info.hnp) {
|
if (orte_proc_info.hnp) {
|
||||||
/* unpack the job data */
|
/* unpack the job data */
|
||||||
n = 1;
|
n = 1;
|
||||||
if (ORTE_SUCCESS != (ret = opal_dss.unpack(buffer, &jdata, &n, ORTE_JOB))) {
|
if (ORTE_SUCCESS != (ret = opal_dss.unpack(buffer, &jdata, &n, ORTE_JOB))) {
|
||||||
@ -778,7 +778,7 @@ static int process_commands(orte_process_name_t* sender,
|
|||||||
/* if we are not the HNP, we can do nothing - report
|
/* if we are not the HNP, we can do nothing - report
|
||||||
* back 0 procs so the tool won't hang
|
* back 0 procs so the tool won't hang
|
||||||
*/
|
*/
|
||||||
if (!orte_process_info.hnp) {
|
if (!orte_proc_info.hnp) {
|
||||||
orte_std_cntr_t zero=0;
|
orte_std_cntr_t zero=0;
|
||||||
|
|
||||||
answer = OBJ_NEW(opal_buffer_t);
|
answer = OBJ_NEW(opal_buffer_t);
|
||||||
@ -861,7 +861,7 @@ static int process_commands(orte_process_name_t* sender,
|
|||||||
/* if we are not the HNP, we can do nothing - report
|
/* if we are not the HNP, we can do nothing - report
|
||||||
* back 0 nodes so the tool won't hang
|
* back 0 nodes so the tool won't hang
|
||||||
*/
|
*/
|
||||||
if (!orte_process_info.hnp) {
|
if (!orte_proc_info.hnp) {
|
||||||
orte_std_cntr_t zero=0;
|
orte_std_cntr_t zero=0;
|
||||||
|
|
||||||
answer = OBJ_NEW(opal_buffer_t);
|
answer = OBJ_NEW(opal_buffer_t);
|
||||||
@ -942,7 +942,7 @@ static int process_commands(orte_process_name_t* sender,
|
|||||||
/* if we are not the HNP, we can do nothing - report
|
/* if we are not the HNP, we can do nothing - report
|
||||||
* back 0 procs so the tool won't hang
|
* back 0 procs so the tool won't hang
|
||||||
*/
|
*/
|
||||||
if (!orte_process_info.hnp) {
|
if (!orte_proc_info.hnp) {
|
||||||
orte_std_cntr_t zero=0;
|
orte_std_cntr_t zero=0;
|
||||||
|
|
||||||
answer = OBJ_NEW(opal_buffer_t);
|
answer = OBJ_NEW(opal_buffer_t);
|
||||||
@ -1077,7 +1077,7 @@ SEND_ANSWER:
|
|||||||
* the requestor. We need to convert that to our own job family
|
* the requestor. We need to convert that to our own job family
|
||||||
*/
|
*/
|
||||||
proc.jobid = ORTE_CONSTRUCT_LOCAL_JOBID(ORTE_PROC_MY_NAME->jobid, proc.jobid);
|
proc.jobid = ORTE_CONSTRUCT_LOCAL_JOBID(ORTE_PROC_MY_NAME->jobid, proc.jobid);
|
||||||
if (orte_process_info.hnp) {
|
if (orte_proc_info.hnp) {
|
||||||
return_addr = sender;
|
return_addr = sender;
|
||||||
/* if the request is for a wildcard vpid, then it goes to every
|
/* if the request is for a wildcard vpid, then it goes to every
|
||||||
* daemon. For scalability, we should probably xcast this some
|
* daemon. For scalability, we should probably xcast this some
|
||||||
@ -1086,7 +1086,7 @@ SEND_ANSWER:
|
|||||||
if (ORTE_VPID_WILDCARD == proc.vpid) {
|
if (ORTE_VPID_WILDCARD == proc.vpid) {
|
||||||
/* loop across all daemons */
|
/* loop across all daemons */
|
||||||
proc2.jobid = ORTE_PROC_MY_NAME->jobid;
|
proc2.jobid = ORTE_PROC_MY_NAME->jobid;
|
||||||
for (proc2.vpid=1; proc2.vpid < orte_process_info.num_procs; proc2.vpid++) {
|
for (proc2.vpid=1; proc2.vpid < orte_proc_info.num_procs; proc2.vpid++) {
|
||||||
/* setup the cmd */
|
/* setup the cmd */
|
||||||
relay_msg = OBJ_NEW(opal_buffer_t);
|
relay_msg = OBJ_NEW(opal_buffer_t);
|
||||||
command = ORTE_DAEMON_TOP_CMD;
|
command = ORTE_DAEMON_TOP_CMD;
|
||||||
@ -1200,7 +1200,7 @@ SEND_ANSWER:
|
|||||||
/* send the answer back to requester - callback
|
/* send the answer back to requester - callback
|
||||||
* function will release buffer
|
* function will release buffer
|
||||||
*/
|
*/
|
||||||
if (orte_process_info.hnp) {
|
if (orte_proc_info.hnp) {
|
||||||
/* if I am the HNP, I need to also provide the number of
|
/* if I am the HNP, I need to also provide the number of
|
||||||
* replies the caller should recv and the sample time
|
* replies the caller should recv and the sample time
|
||||||
*/
|
*/
|
||||||
|
Некоторые файлы не были показаны из-за слишком большого количества измененных файлов Показать больше
Загрузка…
x
Ссылка в новой задаче
Block a user