- First of two or three patches, in orte/util/proc_info.h:
Adapt orte_process_info to orte_proc_info, and change orte_proc_info() to orte_proc_info_init(). - Compiled on linux-x86-64 - Discussed with Ralph This commit was SVN r20739.
Этот коммит содержится в:
родитель
39796e2a56
Коммит
781caee0b6
@ -175,8 +175,8 @@ int ompi_attr_create_predefined(void)
|
||||
}
|
||||
|
||||
/* check the app_num - if it was set, then define it - otherwise, don't */
|
||||
if (orte_process_info.app_num >= 0) {
|
||||
ret = set_f(MPI_APPNUM, orte_process_info.app_num);
|
||||
if (orte_proc_info.app_num >= 0) {
|
||||
ret = set_f(MPI_APPNUM, orte_proc_info.app_num);
|
||||
}
|
||||
|
||||
return ret;
|
||||
|
@ -177,8 +177,8 @@ static void backend_fatal_aggregate(char *type,
|
||||
arg = va_arg(arglist, char*);
|
||||
va_end(arglist);
|
||||
|
||||
asprintf(&prefix, "[%s:%d]", orte_process_info.nodename,
|
||||
(int) orte_process_info.pid);
|
||||
asprintf(&prefix, "[%s:%d]", orte_proc_info.nodename,
|
||||
(int) orte_proc_info.pid);
|
||||
|
||||
if (NULL != error_code) {
|
||||
err_msg = ompi_mpi_errnum_get_string(*error_code);
|
||||
|
@ -66,7 +66,7 @@ void mca_btl_base_error_no_nics(const char* transport,
|
||||
asprintf(&procid, "%s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
||||
|
||||
orte_show_help("help-mpi-btl-base.txt", "btl:no-nics",
|
||||
true, procid, transport, orte_process_info.nodename,
|
||||
true, procid, transport, orte_proc_info.nodename,
|
||||
nic_name);
|
||||
free(procid);
|
||||
}
|
||||
|
@ -37,8 +37,8 @@ OMPI_DECLSPEC extern int mca_btl_base_out(const char*, ...);
|
||||
|
||||
#define BTL_OUTPUT(args) \
|
||||
do { \
|
||||
mca_btl_base_out("[%s]%s[%s:%d:%s] ", \
|
||||
orte_process_info.nodename, \
|
||||
mca_btl_base_out("[%s]%s[%s:%d:%s] ", \
|
||||
orte_proc_info.nodename, \
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), \
|
||||
__FILE__, __LINE__, __func__); \
|
||||
mca_btl_base_out args; \
|
||||
@ -48,8 +48,8 @@ do { \
|
||||
|
||||
#define BTL_ERROR(args) \
|
||||
do { \
|
||||
mca_btl_base_err("[%s]%s[%s:%d:%s] ", \
|
||||
orte_process_info.nodename, \
|
||||
mca_btl_base_err("[%s]%s[%s:%d:%s] ", \
|
||||
orte_proc_info.nodename, \
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), \
|
||||
__FILE__, __LINE__, __func__); \
|
||||
mca_btl_base_err args; \
|
||||
@ -58,10 +58,10 @@ do { \
|
||||
|
||||
#define BTL_PEER_ERROR(proc, args) \
|
||||
do { \
|
||||
mca_btl_base_err("%s[%s:%d:%s] from %s ", \
|
||||
mca_btl_base_err("%s[%s:%d:%s] from %s ", \
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), \
|
||||
__FILE__, __LINE__, __func__, \
|
||||
orte_process_info.nodename); \
|
||||
orte_proc_info.nodename); \
|
||||
if(proc && proc->proc_hostname) { \
|
||||
mca_btl_base_err("to: %s ", proc->proc_hostname); \
|
||||
} \
|
||||
@ -75,7 +75,7 @@ do { \
|
||||
do { \
|
||||
if(mca_btl_base_verbose > 0) { \
|
||||
mca_btl_base_err("[%s]%s[%s:%d:%s] ", \
|
||||
orte_process_info.nodename, \
|
||||
orte_proc_info.nodename, \
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), \
|
||||
__FILE__, __LINE__, __func__); \
|
||||
mca_btl_base_err args; \
|
||||
|
@ -72,7 +72,7 @@ static int mca_btl_elan_add_procs( struct mca_btl_base_module_t* btl,
|
||||
FILE* file;
|
||||
ELAN_BASE* base;
|
||||
|
||||
filename = opal_os_path( false, orte_process_info.proc_session_dir, "ELAN_ID", NULL );
|
||||
filename = opal_os_path( false, orte_proc_info.proc_session_dir, "ELAN_ID", NULL );
|
||||
file = fopen( filename, "w" );
|
||||
fprintf( file, "%s %d\n", ompi_proc_local_proc->proc_hostname, elan_btl->elan_position );
|
||||
|
||||
|
@ -123,13 +123,13 @@ static void show_init_error(const char *file, int line,
|
||||
}
|
||||
|
||||
orte_show_help("help-mpi-btl-openib.txt", "init-fail-no-mem",
|
||||
true, orte_process_info.nodename,
|
||||
true, orte_proc_info.nodename,
|
||||
file, line, func, dev, str_limit);
|
||||
|
||||
if (NULL != str_limit) free(str_limit);
|
||||
} else {
|
||||
orte_show_help("help-mpi-btl-openib.txt", "init-fail-create-q",
|
||||
true, orte_process_info.nodename,
|
||||
true, orte_proc_info.nodename,
|
||||
file, line, func, strerror(errno), errno, dev);
|
||||
}
|
||||
}
|
||||
|
@ -293,7 +293,7 @@ static int btl_openib_async_deviceh(struct mca_btl_openib_async_poll *devices_po
|
||||
case IBV_EVENT_SRQ_ERR:
|
||||
case IBV_EVENT_PORT_ERR:
|
||||
orte_show_help("help-mpi-btl-openib.txt", "of error event",
|
||||
true,orte_process_info.nodename, orte_process_info.pid,
|
||||
true,orte_proc_info.nodename, orte_proc_info.pid,
|
||||
event.event_type, openib_event_to_str(event.event_type),
|
||||
xrc_event ? "true" : "false");
|
||||
break;
|
||||
@ -311,7 +311,7 @@ static int btl_openib_async_deviceh(struct mca_btl_openib_async_poll *devices_po
|
||||
break;
|
||||
default:
|
||||
orte_show_help("help-mpi-btl-openib.txt", "of unknown event",
|
||||
true,orte_process_info.nodename, orte_process_info.pid,
|
||||
true,orte_proc_info.nodename, orte_proc_info.pid,
|
||||
event.event_type, xrc_event ? "true" : "false");
|
||||
}
|
||||
ibv_ack_async_event(&event);
|
||||
|
@ -591,7 +591,7 @@ static int init_one_port(opal_list_t *btl_list, mca_btl_openib_device_t *device,
|
||||
IB_DEFAULT_GID_PREFIX == subnet_id &&
|
||||
mca_btl_openib_component.warn_default_gid_prefix) {
|
||||
orte_show_help("help-mpi-btl-openib.txt", "default subnet prefix",
|
||||
true, orte_process_info.nodename);
|
||||
true, orte_proc_info.nodename);
|
||||
}
|
||||
|
||||
lmc = (1 << ib_port_attr->lmc);
|
||||
@ -949,7 +949,7 @@ static int prepare_device_for_use(mca_btl_openib_device_t *device)
|
||||
"XRC on device without XRC support", true,
|
||||
mca_btl_openib_component.num_xrc_qps,
|
||||
ibv_get_device_name(device->ib_dev),
|
||||
orte_process_info.nodename);
|
||||
orte_proc_info.nodename);
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
@ -1237,7 +1237,7 @@ static int setup_qps(void)
|
||||
if (0 == opal_argv_count(queues)) {
|
||||
orte_show_help("help-mpi-btl-openib.txt",
|
||||
"no qps in receive_queues", true,
|
||||
orte_process_info.nodename,
|
||||
orte_proc_info.nodename,
|
||||
mca_btl_openib_component.receive_queues);
|
||||
ret = OMPI_ERROR;
|
||||
goto error;
|
||||
@ -1256,7 +1256,7 @@ static int setup_qps(void)
|
||||
num_xrc_qps++;
|
||||
#else
|
||||
orte_show_help("help-mpi-btl-openib.txt", "No XRC support", true,
|
||||
orte_process_info.nodename,
|
||||
orte_proc_info.nodename,
|
||||
mca_btl_openib_component.receive_queues);
|
||||
ret = OMPI_ERR_NOT_AVAILABLE;
|
||||
goto error;
|
||||
@ -1264,7 +1264,7 @@ static int setup_qps(void)
|
||||
} else {
|
||||
orte_show_help("help-mpi-btl-openib.txt",
|
||||
"invalid qp type in receive_queues", true,
|
||||
orte_process_info.nodename,
|
||||
orte_proc_info.nodename,
|
||||
mca_btl_openib_component.receive_queues,
|
||||
queues[qp]);
|
||||
ret = OMPI_ERR_BAD_PARAM;
|
||||
@ -1276,7 +1276,7 @@ static int setup_qps(void)
|
||||
and SRQ */
|
||||
if (num_xrc_qps > 0 && (num_pp_qps > 0 || num_srq_qps > 0)) {
|
||||
orte_show_help("help-mpi-btl-openib.txt", "XRC with PP or SRQ", true,
|
||||
orte_process_info.nodename,
|
||||
orte_proc_info.nodename,
|
||||
mca_btl_openib_component.receive_queues);
|
||||
ret = OMPI_ERR_BAD_PARAM;
|
||||
goto error;
|
||||
@ -1285,7 +1285,7 @@ static int setup_qps(void)
|
||||
/* Current XRC implementation can't used with btls_per_lid > 1 */
|
||||
if (num_xrc_qps > 0 && mca_btl_openib_component.btls_per_lid > 1) {
|
||||
orte_show_help("help-mpi-btl-openib.txt", "XRC with BTLs per LID",
|
||||
true, orte_process_info.nodename,
|
||||
true, orte_proc_info.nodename,
|
||||
mca_btl_openib_component.receive_queues, num_xrc_qps);
|
||||
ret = OMPI_ERR_BAD_PARAM;
|
||||
goto error;
|
||||
@ -1312,7 +1312,7 @@ static int setup_qps(void)
|
||||
if (count < 3 || count > 6) {
|
||||
orte_show_help("help-mpi-btl-openib.txt",
|
||||
"invalid pp qp specification", true,
|
||||
orte_process_info.nodename, queues[qp]);
|
||||
orte_proc_info.nodename, queues[qp]);
|
||||
ret = OMPI_ERR_BAD_PARAM;
|
||||
goto error;
|
||||
}
|
||||
@ -1343,7 +1343,7 @@ static int setup_qps(void)
|
||||
if (count < 3 || count > 5) {
|
||||
orte_show_help("help-mpi-btl-openib.txt",
|
||||
"invalid srq specification", true,
|
||||
orte_process_info.nodename, queues[qp]);
|
||||
orte_proc_info.nodename, queues[qp]);
|
||||
ret = OMPI_ERR_BAD_PARAM;
|
||||
goto error;
|
||||
}
|
||||
@ -1367,7 +1367,7 @@ static int setup_qps(void)
|
||||
|
||||
if (rd_num <= rd_low) {
|
||||
orte_show_help("help-mpi-btl-openib.txt", "rd_num must be > rd_low",
|
||||
true, orte_process_info.nodename, queues[qp]);
|
||||
true, orte_proc_info.nodename, queues[qp]);
|
||||
ret = OMPI_ERR_BAD_PARAM;
|
||||
goto error;
|
||||
}
|
||||
@ -1388,21 +1388,21 @@ static int setup_qps(void)
|
||||
if (max_qp_size < max_size_needed) {
|
||||
orte_show_help("help-mpi-btl-openib.txt",
|
||||
"biggest qp size is too small", true,
|
||||
orte_process_info.nodename, max_qp_size,
|
||||
orte_proc_info.nodename, max_qp_size,
|
||||
max_size_needed);
|
||||
ret = OMPI_ERR_BAD_PARAM;
|
||||
goto error;
|
||||
} else if (max_qp_size > max_size_needed) {
|
||||
orte_show_help("help-mpi-btl-openib.txt",
|
||||
"biggest qp size is too big", true,
|
||||
orte_process_info.nodename, max_qp_size,
|
||||
orte_proc_info.nodename, max_qp_size,
|
||||
max_size_needed);
|
||||
}
|
||||
|
||||
if (mca_btl_openib_component.ib_free_list_max > 0 &&
|
||||
min_freelist_size > mca_btl_openib_component.ib_free_list_max) {
|
||||
orte_show_help("help-mpi-btl-openib.txt", "freelist too small", true,
|
||||
orte_process_info.nodename,
|
||||
orte_proc_info.nodename,
|
||||
mca_btl_openib_component.ib_free_list_max,
|
||||
min_freelist_size);
|
||||
ret = OMPI_ERR_BAD_PARAM;
|
||||
@ -1487,7 +1487,7 @@ static int init_one_device(opal_list_t *btl_list, struct ibv_device* ib_dev)
|
||||
if (mca_btl_openib_component.warn_no_device_params_found) {
|
||||
orte_show_help("help-mpi-btl-openib.txt",
|
||||
"no device params found", true,
|
||||
orte_process_info.nodename,
|
||||
orte_proc_info.nodename,
|
||||
ibv_get_device_name(device->ib_dev),
|
||||
device->ib_dev_attr.vendor_id,
|
||||
device->ib_dev_attr.vendor_part_id);
|
||||
@ -1593,7 +1593,7 @@ static int init_one_device(opal_list_t *btl_list, struct ibv_device* ib_dev)
|
||||
#endif
|
||||
if (NULL == cq) {
|
||||
orte_show_help("help-mpi-btl-openib.txt", "init-fail-create-q",
|
||||
true, orte_process_info.nodename,
|
||||
true, orte_proc_info.nodename,
|
||||
__FILE__, __LINE__, "ibv_create_cq",
|
||||
strerror(errno), errno,
|
||||
ibv_get_device_name(device->ib_dev));
|
||||
@ -1649,7 +1649,7 @@ static int init_one_device(opal_list_t *btl_list, struct ibv_device* ib_dev)
|
||||
mca_btl_openib_component.receive_queues)) {
|
||||
orte_show_help("help-mpi-btl-openib.txt",
|
||||
"conflicting receive_queues", true,
|
||||
orte_process_info.nodename,
|
||||
orte_proc_info.nodename,
|
||||
ibv_get_device_name(device->ib_dev),
|
||||
device->ib_dev_attr.vendor_id,
|
||||
device->ib_dev_attr.vendor_part_id,
|
||||
@ -1699,7 +1699,7 @@ static int init_one_device(opal_list_t *btl_list, struct ibv_device* ib_dev)
|
||||
"XRC on device without XRC support", true,
|
||||
mca_btl_openib_component.num_xrc_qps,
|
||||
ibv_get_device_name(device->ib_dev),
|
||||
orte_process_info.nodename);
|
||||
orte_proc_info.nodename);
|
||||
ret = OMPI_SUCCESS;
|
||||
goto error;
|
||||
}
|
||||
@ -1823,7 +1823,7 @@ error:
|
||||
if (OMPI_SUCCESS != ret) {
|
||||
orte_show_help("help-mpi-btl-openib.txt",
|
||||
"error in device init", true,
|
||||
orte_process_info.nodename,
|
||||
orte_proc_info.nodename,
|
||||
ibv_get_device_name(device->ib_dev));
|
||||
}
|
||||
|
||||
@ -2086,7 +2086,7 @@ btl_openib_component_init(int *num_btl_modules,
|
||||
((OPAL_MEMORY_FREE_SUPPORT | OPAL_MEMORY_MUNMAP_SUPPORT) & value)) {
|
||||
orte_show_help("help-mpi-btl-openib.txt",
|
||||
"ptmalloc2 with no threads", true,
|
||||
orte_process_info.nodename);
|
||||
orte_proc_info.nodename);
|
||||
goto no_btls;
|
||||
}
|
||||
#endif
|
||||
@ -2204,7 +2204,7 @@ btl_openib_component_init(int *num_btl_modules,
|
||||
if (mca_btl_openib_component.want_fork_support > 0) {
|
||||
orte_show_help("help-mpi-btl-openib.txt",
|
||||
"ibv_fork_init fail", true,
|
||||
orte_process_info.nodename);
|
||||
orte_proc_info.nodename);
|
||||
goto no_btls;
|
||||
}
|
||||
}
|
||||
@ -2313,7 +2313,7 @@ btl_openib_component_init(int *num_btl_modules,
|
||||
free(dev_sorted);
|
||||
if (!found) {
|
||||
orte_show_help("help-mpi-btl-openib.txt", "no devices right type",
|
||||
true, orte_process_info.nodename,
|
||||
true, orte_proc_info.nodename,
|
||||
((BTL_OPENIB_DT_IB == mca_btl_openib_component.device_type) ?
|
||||
"InfiniBand" :
|
||||
(BTL_OPENIB_DT_IWARP == mca_btl_openib_component.device_type) ?
|
||||
@ -2330,7 +2330,7 @@ btl_openib_component_init(int *num_btl_modules,
|
||||
mca_btl_openib_component.warn_nonexistent_if) {
|
||||
char *str = opal_argv_join(mca_btl_openib_component.if_list, ',');
|
||||
orte_show_help("help-mpi-btl-openib.txt", "nonexistent port",
|
||||
true, orte_process_info.nodename,
|
||||
true, orte_proc_info.nodename,
|
||||
((NULL != mca_btl_openib_component.if_include) ?
|
||||
"in" : "ex"), str);
|
||||
free(str);
|
||||
@ -2338,7 +2338,7 @@ btl_openib_component_init(int *num_btl_modules,
|
||||
|
||||
if(0 == mca_btl_openib_component.ib_num_btls) {
|
||||
orte_show_help("help-mpi-btl-openib.txt",
|
||||
"no active ports found", true, orte_process_info.nodename);
|
||||
"no active ports found", true, orte_proc_info.nodename);
|
||||
goto no_btls;
|
||||
}
|
||||
|
||||
@ -2385,7 +2385,7 @@ btl_openib_component_init(int *num_btl_modules,
|
||||
if (OMPI_SUCCESS != ret) {
|
||||
orte_show_help("help-mpi-btl-openib.txt",
|
||||
"error in device init", true,
|
||||
orte_process_info.nodename,
|
||||
orte_proc_info.nodename,
|
||||
ibv_get_device_name(device->ib_dev));
|
||||
goto no_btls;
|
||||
}
|
||||
@ -2924,24 +2924,24 @@ error:
|
||||
BTL_OPENIB_QP_TYPE_PP(qp) ?
|
||||
"pp rnr retry exceeded" :
|
||||
"srq rnr retry exceeded", true,
|
||||
orte_process_info.nodename, device_name,
|
||||
orte_proc_info.nodename, device_name,
|
||||
peer_hostname);
|
||||
orte_notifier.help(ORTE_NOTIFIER_INFRA, ORTE_ERR_COMM_FAILURE,
|
||||
"help-mpi-btl-openib.txt",
|
||||
BTL_OPENIB_QP_TYPE_PP(qp) ?
|
||||
"pp rnr retry exceeded" :
|
||||
"srq rnr retry exceeded",
|
||||
orte_process_info.nodename, device_name,
|
||||
orte_proc_info.nodename, device_name,
|
||||
peer_hostname);
|
||||
} else if (IBV_WC_RETRY_EXC_ERR == wc->status) {
|
||||
orte_show_help("help-mpi-btl-openib.txt",
|
||||
"pp retry exceeded", true,
|
||||
orte_process_info.nodename,
|
||||
orte_proc_info.nodename,
|
||||
device_name, peer_hostname);
|
||||
orte_notifier.help(ORTE_NOTIFIER_INFRA, ORTE_ERR_COMM_FAILURE,
|
||||
"help-mpi-btl-openib.txt",
|
||||
"pp retry exceeded",
|
||||
orte_process_info.nodename,
|
||||
orte_proc_info.nodename,
|
||||
device_name, peer_hostname);
|
||||
}
|
||||
}
|
||||
|
@ -1087,7 +1087,7 @@ void *mca_btl_openib_endpoint_invoke_error(void *context)
|
||||
if (NULL == btl || NULL == btl->error_cb) {
|
||||
orte_show_help("help-mpi-btl-openib.txt",
|
||||
"cannot raise btl error", true,
|
||||
orte_process_info.nodename,
|
||||
orte_proc_info.nodename,
|
||||
__FILE__, __LINE__);
|
||||
exit(1);
|
||||
}
|
||||
|
@ -178,7 +178,7 @@ int btl_openib_register_mca_params(void)
|
||||
if (0 != ival) {
|
||||
orte_show_help("help-mpi-btl-openib.txt",
|
||||
"ibv_fork requested but not supported", true,
|
||||
orte_process_info.nodename);
|
||||
orte_proc_info.nodename);
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
}
|
||||
@ -208,7 +208,7 @@ int btl_openib_register_mca_params(void)
|
||||
} else {
|
||||
orte_show_help("help-mpi-btl-openib.txt",
|
||||
"ibv_fork requested but not supported", true,
|
||||
orte_process_info.nodename);
|
||||
orte_proc_info.nodename);
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
free(str);
|
||||
@ -458,7 +458,7 @@ int btl_openib_register_mca_params(void)
|
||||
64, &ival, REGINT_GE_ZERO));
|
||||
if(ival <= 1 || (ival & (ival - 1))) {
|
||||
orte_show_help("help-mpi-btl-openib.txt", "wrong buffer alignment",
|
||||
true, ival, orte_process_info.nodename, 64);
|
||||
true, ival, orte_proc_info.nodename, 64);
|
||||
mca_btl_openib_component.buffer_alignment = 64;
|
||||
} else {
|
||||
mca_btl_openib_component.buffer_alignment = (uint32_t) ival;
|
||||
|
@ -42,7 +42,7 @@ int mca_btl_openib_open_xrc_domain(struct mca_btl_openib_device_t *device)
|
||||
dev_name = ibv_get_device_name(device->ib_dev);
|
||||
len = asprintf(&xrc_file_name,
|
||||
"%s"OPAL_PATH_SEP"openib_xrc_domain_%s",
|
||||
orte_process_info.job_session_dir, dev_name);
|
||||
orte_proc_info.job_session_dir, dev_name);
|
||||
if (0 > len) {
|
||||
BTL_ERROR(("Failed to allocate memomry for XRC file name\n",
|
||||
strerror(errno)));
|
||||
|
@ -121,7 +121,7 @@ int ompi_btl_openib_connect_base_register(void)
|
||||
if (NULL == all[i]) {
|
||||
orte_show_help("help-mpi-btl-openib-cpc-base.txt",
|
||||
"cpc name not found", true,
|
||||
"include", orte_process_info.nodename,
|
||||
"include", orte_proc_info.nodename,
|
||||
"include", cpc_include, temp[j],
|
||||
all_cpc_names);
|
||||
opal_argv_free(temp);
|
||||
@ -147,7 +147,7 @@ int ompi_btl_openib_connect_base_register(void)
|
||||
if (NULL == all[i]) {
|
||||
orte_show_help("help-mpi-btl-openib-cpc-base.txt",
|
||||
"cpc name not found", true,
|
||||
"exclude", orte_process_info.nodename,
|
||||
"exclude", orte_proc_info.nodename,
|
||||
"exclude", cpc_exclude, temp[j],
|
||||
all_cpc_names);
|
||||
opal_argv_free(temp);
|
||||
@ -292,7 +292,7 @@ int ompi_btl_openib_connect_base_select_for_local_port(mca_btl_openib_module_t *
|
||||
if (0 == cpc_index) {
|
||||
orte_show_help("help-mpi-btl-openib-cpc-base.txt",
|
||||
"no cpcs for port", true,
|
||||
orte_process_info.nodename,
|
||||
orte_proc_info.nodename,
|
||||
ibv_get_device_name(btl->device->ib_dev),
|
||||
msg);
|
||||
free(cpcs);
|
||||
|
@ -923,7 +923,7 @@ static int qp_create_one(mca_btl_base_endpoint_t* endpoint, int qp,
|
||||
if (init_attr.cap.max_inline_data < req_inline) {
|
||||
endpoint->qps[qp].ib_inline_max = init_attr.cap.max_inline_data;
|
||||
orte_show_help("help-mpi-btl-openib-cpc-base.txt",
|
||||
"inline truncated", orte_process_info.nodename,
|
||||
"inline truncated", orte_proc_info.nodename,
|
||||
ibv_get_device_name(openib_btl->device->ib_dev),
|
||||
req_inline, init_attr.cap.max_inline_data);
|
||||
} else {
|
||||
@ -2314,7 +2314,7 @@ static int request_error(ibcm_listen_cm_id_t *cmh, struct ib_cm_event *event)
|
||||
if (IBV_WC_RESP_TIMEOUT_ERR != event->param.send_status) {
|
||||
orte_show_help("help-mpi-btl-openib-cpc-ibcm.txt",
|
||||
"unhandled error", true,
|
||||
"request", orte_process_info.nodename,
|
||||
"request", orte_proc_info.nodename,
|
||||
event->param.send_status);
|
||||
} else {
|
||||
ibcm_request_t *req;
|
||||
@ -2325,7 +2325,7 @@ static int request_error(ibcm_listen_cm_id_t *cmh, struct ib_cm_event *event)
|
||||
if (NULL == req) {
|
||||
orte_show_help("help-mpi-btl-openib-cpc-ibcm.txt",
|
||||
"timeout not found", true,
|
||||
"request", orte_process_info.nodename);
|
||||
"request", orte_proc_info.nodename);
|
||||
} else {
|
||||
endpoint = req->endpoint;
|
||||
}
|
||||
@ -2346,7 +2346,7 @@ static int reply_error(ibcm_listen_cm_id_t *cmh, struct ib_cm_event *event)
|
||||
if (IBV_WC_RESP_TIMEOUT_ERR != event->param.send_status) {
|
||||
orte_show_help("help-mpi-btl-openib-cpc-ibcm.txt",
|
||||
"unhandled error", true,
|
||||
"reply", orte_process_info.nodename,
|
||||
"reply", orte_proc_info.nodename,
|
||||
event->param.send_status);
|
||||
} else {
|
||||
ibcm_reply_t *rep;
|
||||
@ -2357,7 +2357,7 @@ static int reply_error(ibcm_listen_cm_id_t *cmh, struct ib_cm_event *event)
|
||||
if (NULL == rep) {
|
||||
orte_show_help("help-mpi-btl-openib-cpc-ibcm.txt",
|
||||
"timeout not found", true,
|
||||
"reply", orte_process_info.nodename);
|
||||
"reply", orte_proc_info.nodename);
|
||||
} else {
|
||||
endpoint = rep->endpoint;
|
||||
}
|
||||
|
@ -465,7 +465,7 @@ static int qp_create_one(mca_btl_base_endpoint_t* endpoint, int qp,
|
||||
if (init_attr.cap.max_inline_data < req_inline) {
|
||||
endpoint->qps[qp].ib_inline_max = init_attr.cap.max_inline_data;
|
||||
orte_show_help("help-mpi-btl-openib-cpc-base.txt",
|
||||
"inline truncated", true, orte_process_info.nodename,
|
||||
"inline truncated", true, orte_proc_info.nodename,
|
||||
ibv_get_device_name(openib_btl->device->ib_dev),
|
||||
req_inline, init_attr.cap.max_inline_data);
|
||||
} else {
|
||||
|
@ -426,7 +426,7 @@ static int rdmacm_setup_qp(rdmacm_contents_t *contents,
|
||||
endpoint->qps[qpnum].ib_inline_max = attr.cap.max_inline_data;
|
||||
orte_show_help("help-mpi-btl-openib-cpc-base.txt",
|
||||
"inline truncated", true,
|
||||
orte_process_info.nodename,
|
||||
orte_proc_info.nodename,
|
||||
ibv_get_device_name(contents->openib_btl->device->ib_dev),
|
||||
req_inline, attr.cap.max_inline_data);
|
||||
} else {
|
||||
@ -722,14 +722,14 @@ static void *show_help_cant_find_endpoint(void *context)
|
||||
msg = stringify(c->peer_ip_addr);
|
||||
orte_show_help("help-mpi-btl-openib-cpc-rdmacm.txt",
|
||||
"could not find matching endpoint", true,
|
||||
orte_process_info.nodename,
|
||||
orte_proc_info.nodename,
|
||||
c->device_name,
|
||||
c->peer_tcp_port);
|
||||
free(msg);
|
||||
} else {
|
||||
orte_show_help("help-mpi-btl-openib-cpc-rdmacm.txt",
|
||||
"could not find matching endpoint", true,
|
||||
orte_process_info.nodename,
|
||||
orte_proc_info.nodename,
|
||||
"<unknown>", "<unknown>", -1);
|
||||
}
|
||||
free(context);
|
||||
@ -1421,7 +1421,7 @@ static void *show_help_rdmacm_event_error(void *c)
|
||||
if (RDMA_CM_EVENT_DEVICE_REMOVAL == event->event) {
|
||||
orte_show_help("help-mpi-btl-openib-cpc-rdmacm.txt",
|
||||
"rdma cm device removal", true,
|
||||
orte_process_info.nodename,
|
||||
orte_proc_info.nodename,
|
||||
ibv_get_device_name(event->id->verbs->device));
|
||||
} else {
|
||||
const char *device = "Unknown";
|
||||
@ -1432,7 +1432,7 @@ static void *show_help_rdmacm_event_error(void *c)
|
||||
}
|
||||
orte_show_help("help-mpi-btl-openib-cpc-rdmacm.txt",
|
||||
"rdma cm event error", true,
|
||||
orte_process_info.nodename,
|
||||
orte_proc_info.nodename,
|
||||
device,
|
||||
rdma_event_str(event->event),
|
||||
context->endpoint->endpoint_proc->proc_ompi->proc_hostname);
|
||||
|
@ -411,7 +411,7 @@ static int xoob_send_qp_create (mca_btl_base_endpoint_t* endpoint)
|
||||
if (qp_init_attr.cap.max_inline_data < req_inline) {
|
||||
endpoint->qps[0].ib_inline_max = qp_init_attr.cap.max_inline_data;
|
||||
orte_show_help("help-mpi-btl-openib-cpc-base.txt",
|
||||
"inline truncated", orte_process_info.nodename,
|
||||
"inline truncated", orte_proc_info.nodename,
|
||||
ibv_get_device_name(openib_btl->device->ib_dev),
|
||||
req_inline, qp_init_attr.cap.max_inline_data);
|
||||
} else {
|
||||
|
@ -233,8 +233,8 @@ static int sm_btl_first_time_init(mca_btl_sm_t *sm_btl, int n)
|
||||
|
||||
/* set file name */
|
||||
if(asprintf(&sm_ctl_file, "%s"OPAL_PATH_SEP"shared_mem_btl_module.%s",
|
||||
orte_process_info.job_session_dir,
|
||||
orte_process_info.nodename) < 0)
|
||||
orte_proc_info.job_session_dir,
|
||||
orte_proc_info.nodename) < 0)
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
|
||||
/* Pass in a data segment alignment of 0 to get no data
|
||||
@ -371,7 +371,7 @@ create_sm_endpoint(int local_proc, struct ompi_proc_t *proc)
|
||||
OBJ_CONSTRUCT(&ep->pending_sends, opal_list_t);
|
||||
#if OMPI_ENABLE_PROGRESS_THREADS == 1
|
||||
sprintf(path, "%s"OPAL_PATH_SEP"sm_fifo.%lu",
|
||||
orte_process_info.job_session_dir,
|
||||
orte_proc_info.job_session_dir,
|
||||
(unsigned long)proc->proc_name.vpid);
|
||||
ep->fifo_fd = open(path, O_WRONLY);
|
||||
if(ep->fifo_fd < 0) {
|
||||
@ -848,7 +848,7 @@ int mca_btl_sm_ft_event(int state) {
|
||||
opal_crs_base_metadata_write_token(NULL, CRS_METADATA_TOUCH, mca_btl_sm_component.mmap_file->map_path);
|
||||
|
||||
/* Record the job session directory */
|
||||
opal_crs_base_metadata_write_token(NULL, CRS_METADATA_MKDIR, orte_process_info.job_session_dir);
|
||||
opal_crs_base_metadata_write_token(NULL, CRS_METADATA_MKDIR, orte_proc_info.job_session_dir);
|
||||
}
|
||||
}
|
||||
else if(OPAL_CRS_CONTINUE == state) {
|
||||
@ -868,7 +868,7 @@ int mca_btl_sm_ft_event(int state) {
|
||||
OPAL_CRS_RESTART_PRE == state) {
|
||||
if( NULL != mca_btl_sm_component.mmap_file ) {
|
||||
/* Add session directory */
|
||||
opal_crs_base_cleanup_append(orte_process_info.job_session_dir, true);
|
||||
opal_crs_base_cleanup_append(orte_proc_info.job_session_dir, true);
|
||||
/* Add shared memory file */
|
||||
opal_crs_base_cleanup_append(mca_btl_sm_component.mmap_file->map_path, false);
|
||||
}
|
||||
|
@ -272,7 +272,7 @@ mca_btl_base_module_t** mca_btl_sm_component_init(
|
||||
#if OMPI_ENABLE_PROGRESS_THREADS == 1
|
||||
/* create a named pipe to receive events */
|
||||
sprintf( mca_btl_sm_component.sm_fifo_path,
|
||||
"%s"OPAL_PATH_SEP"sm_fifo.%lu", orte_process_info.job_session_dir,
|
||||
"%s"OPAL_PATH_SEP"sm_fifo.%lu", orte_proc_info.job_session_dir,
|
||||
(unsigned long)ORTE_PROC_MY_NAME->vpid );
|
||||
if(mkfifo(mca_btl_sm_component.sm_fifo_path, 0660) < 0) {
|
||||
opal_output(0, "mca_btl_sm_component_init: mkfifo failed with errno=%d\n",errno);
|
||||
|
@ -803,7 +803,7 @@ static int mca_btl_udapl_assign_netmask(mca_btl_udapl_module_t* udapl_btl)
|
||||
|
||||
BTL_UDAPL_VERBOSE_HELP(VERBOSE_SHOW_HELP,
|
||||
("help-mpi-btl-udapl.txt", "interface not found",
|
||||
true, orte_process_info.nodename, btl_addr_string));
|
||||
true, orte_proc_info.nodename, btl_addr_string));
|
||||
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
@ -817,7 +817,7 @@ static int mca_btl_udapl_assign_netmask(mca_btl_udapl_module_t* udapl_btl)
|
||||
|
||||
BTL_UDAPL_VERBOSE_HELP(VERBOSE_SHOW_HELP,
|
||||
("help-mpi-btl-udapl.txt", "netmask not found",
|
||||
true, orte_process_info.nodename, btl_addr_string));
|
||||
true, orte_proc_info.nodename, btl_addr_string));
|
||||
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
@ -831,7 +831,7 @@ static int mca_btl_udapl_assign_netmask(mca_btl_udapl_module_t* udapl_btl)
|
||||
/* current uDAPL BTL does not support IPv6 */
|
||||
BTL_UDAPL_VERBOSE_HELP(VERBOSE_SHOW_HELP,
|
||||
("help-mpi-btl-udapl.txt", "IPv4 only",
|
||||
true, orte_process_info.nodename));
|
||||
true, orte_proc_info.nodename));
|
||||
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
@ -418,7 +418,7 @@ static int mca_btl_udapl_modify_ia_list(DAT_COUNT *num_info_entries,
|
||||
char *str = opal_argv_join(mca_btl_udapl_component.if_list, ',');
|
||||
BTL_UDAPL_VERBOSE_HELP(VERBOSE_SHOW_HELP,
|
||||
("help-mpi-btl-udapl.txt", "nonexistent entry",
|
||||
true, orte_process_info.nodename,
|
||||
true, orte_proc_info.nodename,
|
||||
((NULL != mca_btl_udapl_component.if_include) ?
|
||||
"in" : "ex"), str));
|
||||
free(str);
|
||||
|
@ -253,14 +253,14 @@ static int mca_btl_udapl_proc_address_match(
|
||||
/* current uDAPL BTL only supports IPv4 */
|
||||
BTL_UDAPL_VERBOSE_HELP(VERBOSE_SHOW_HELP,
|
||||
("help-mpi-btl-udapl.txt", "IPv4 only",
|
||||
true, orte_process_info.nodename));
|
||||
true, orte_proc_info.nodename));
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
if (MCA_BTL_UDAPL_INVALID_PEER_ADDR_IDX == *peer_addr_idx) {
|
||||
BTL_UDAPL_VERBOSE_HELP(VERBOSE_SHOW_HELP,
|
||||
("help-mpi-btl-udapl.txt", "no network match",
|
||||
true, btl_addr_string, orte_process_info.nodename,
|
||||
true, btl_addr_string, orte_proc_info.nodename,
|
||||
peer_proc->proc_ompi->proc_hostname));
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
@ -518,8 +518,8 @@ static int bootstrap_init(void)
|
||||
if (NULL == mca_coll_sm_component.sm_bootstrap_filename) {
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
orte_proc_info();
|
||||
fullpath = opal_os_path( false, orte_process_info.job_session_dir,
|
||||
orte_proc_info_init();
|
||||
fullpath = opal_os_path( false, orte_proc_info.job_session_dir,
|
||||
mca_coll_sm_component.sm_bootstrap_filename, NULL );
|
||||
if (NULL == fullpath) {
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
|
@ -233,7 +233,7 @@ static int allocate_shared_file(size_t size, char **file_name,
|
||||
*/
|
||||
unique_comm_id=(int)getpid();
|
||||
len=asprintf(&f_name,
|
||||
"%s"OPAL_PATH_SEP"sm_coll_v2_%0d_%0d",orte_process_info.job_session_dir,
|
||||
"%s"OPAL_PATH_SEP"sm_coll_v2_%0d_%0d",orte_proc_info.job_session_dir,
|
||||
ompi_comm_get_cid(comm),unique_comm_id);
|
||||
if( 0 > len ) {
|
||||
return OMPI_ERROR;
|
||||
@ -318,7 +318,7 @@ static int allocate_shared_file(size_t size, char **file_name,
|
||||
* communicators, that could have the same communicator id
|
||||
*/
|
||||
len=asprintf(&f_name,
|
||||
"%s"OPAL_PATH_SEP"sm_coll_v2_%0d_%0d",orte_process_info.job_session_dir,
|
||||
"%s"OPAL_PATH_SEP"sm_coll_v2_%0d_%0d",orte_proc_info.job_session_dir,
|
||||
ompi_comm_get_cid(comm),unique_comm_id);
|
||||
if( 0 > len ) {
|
||||
return OMPI_ERROR;
|
||||
@ -987,8 +987,8 @@ mca_coll_sm2_comm_query(struct ompi_communicator_t *comm, int *priority)
|
||||
/* set file name */
|
||||
/*
|
||||
len=asprintf(&(sm_module->coll_sm2_file_name),
|
||||
"%s"OPAL_PATH_SEP"sm_coll_v2%s_%0d\0",orte_process_info.job_session_dir,
|
||||
orte_process_info.nodename,ompi_comm_get_cid(comm));
|
||||
"%s"OPAL_PATH_SEP"sm_coll_v2%s_%0d\0",orte_proc_info.job_session_dir,
|
||||
orte_proc_info.nodename,ompi_comm_get_cid(comm));
|
||||
if( 0 > len ) {
|
||||
goto CLEANUP;
|
||||
}
|
||||
|
@ -177,7 +177,7 @@ int mca_coll_sync_module_enable(mca_coll_base_module_t *module,
|
||||
return OMPI_SUCCESS;
|
||||
} else {
|
||||
orte_show_help("help-coll-sync.txt", "missing collective", true,
|
||||
orte_process_info.nodename,
|
||||
orte_proc_info.nodename,
|
||||
mca_coll_sync_component.priority, msg);
|
||||
return OMPI_ERR_NOT_FOUND;
|
||||
}
|
||||
|
@ -6470,10 +6470,10 @@ static void traffic_message_dump_msg_list(opal_list_t *msg_list, bool is_drain)
|
||||
|
||||
static void traffic_message_dump_peer(ompi_crcp_bkmrk_pml_peer_ref_t *peer_ref, char * msg, bool root_only)
|
||||
{
|
||||
if( root_only && orte_process_info.my_name.vpid != 0 ) {
|
||||
if( root_only && orte_proc_info.my_name.vpid != 0 ) {
|
||||
return;
|
||||
} else {
|
||||
sleep(orte_process_info.my_name.vpid * 2);
|
||||
sleep(orte_proc_info.my_name.vpid * 2);
|
||||
}
|
||||
|
||||
opal_output(0, "------------- %s ---------------------------------", msg);
|
||||
|
@ -770,7 +770,7 @@ static int open_port(char *port_name, orte_rml_tag_t given_tag)
|
||||
|
||||
OPAL_THREAD_LOCK(&ompi_dpm_port_mutex);
|
||||
|
||||
if (NULL == orte_process_info.my_hnp_uri) {
|
||||
if (NULL == orte_proc_info.my_hnp_uri) {
|
||||
rc = ORTE_ERR_NOT_AVAILABLE;
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto cleanup;
|
||||
@ -790,7 +790,7 @@ static int open_port(char *port_name, orte_rml_tag_t given_tag)
|
||||
}
|
||||
|
||||
|
||||
len = strlen(orte_process_info.my_hnp_uri) + strlen(rml_uri) + strlen(tag);
|
||||
len = strlen(orte_proc_info.my_hnp_uri) + strlen(rml_uri) + strlen(tag);
|
||||
|
||||
/* if the overall port name is too long, we abort */
|
||||
if (len > (MPI_MAX_PORT_NAME-1)) {
|
||||
@ -799,7 +799,7 @@ static int open_port(char *port_name, orte_rml_tag_t given_tag)
|
||||
}
|
||||
|
||||
/* assemble the port name */
|
||||
snprintf(port_name, MPI_MAX_PORT_NAME, "%s+%s:%s", orte_process_info.my_hnp_uri, rml_uri, tag);
|
||||
snprintf(port_name, MPI_MAX_PORT_NAME, "%s+%s:%s", orte_proc_info.my_hnp_uri, rml_uri, tag);
|
||||
rc = OMPI_SUCCESS;
|
||||
|
||||
cleanup:
|
||||
|
@ -137,7 +137,7 @@ mca_mpool_base_module_t* mca_mpool_base_module_create(
|
||||
} else {
|
||||
orte_show_help("help-mpool-base.txt", "leave pinned failed",
|
||||
true, ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
orte_process_info.nodename);
|
||||
orte_proc_info.nodename);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
@ -173,14 +173,14 @@ void mca_mpool_base_tree_print(void)
|
||||
ompi_debug_show_mpi_alloc_mem_leaks < 0) {
|
||||
orte_show_help("help-mpool-base.txt", "all mem leaks",
|
||||
true, ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
orte_process_info.nodename,
|
||||
orte_process_info.pid, leak_msg);
|
||||
orte_proc_info.nodename,
|
||||
orte_proc_info.pid, leak_msg);
|
||||
} else {
|
||||
int i = num_leaks - ompi_debug_show_mpi_alloc_mem_leaks;
|
||||
orte_show_help("help-mpool-base.txt", "some mem leaks",
|
||||
true, ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
orte_process_info.nodename,
|
||||
orte_process_info.pid, leak_msg, i,
|
||||
orte_proc_info.nodename,
|
||||
orte_proc_info.pid, leak_msg, i,
|
||||
(i > 1) ? "s were" : " was",
|
||||
(i > 1) ? "are" : "is");
|
||||
}
|
||||
|
@ -207,8 +207,8 @@ static mca_mpool_base_module_t* mca_mpool_sm_init(
|
||||
|
||||
/* create initial shared memory mapping */
|
||||
len = asprintf( &file_name, "%s"OPAL_PATH_SEP"shared_mem_pool.%s",
|
||||
orte_process_info.job_session_dir,
|
||||
orte_process_info.nodename );
|
||||
orte_proc_info.job_session_dir,
|
||||
orte_proc_info.nodename );
|
||||
if ( 0 > len ) {
|
||||
free(mpool_module);
|
||||
return NULL;
|
||||
|
@ -131,8 +131,8 @@ int mca_mpool_sm_ft_event(int state) {
|
||||
if(OPAL_CRS_CHECKPOINT == state) {
|
||||
/* Record the shared memory filename */
|
||||
asprintf( &file_name, "%s"OPAL_PATH_SEP"shared_mem_pool.%s",
|
||||
orte_process_info.job_session_dir,
|
||||
orte_process_info.nodename );
|
||||
orte_proc_info.job_session_dir,
|
||||
orte_proc_info.nodename );
|
||||
opal_crs_base_metadata_write_token(NULL, CRS_METADATA_TOUCH, file_name);
|
||||
free(file_name);
|
||||
file_name = NULL;
|
||||
|
@ -141,7 +141,7 @@ int vprotocol_pessimist_sender_based_init(const char *mmapfile, size_t size)
|
||||
OBJ_CONSTRUCT(&sb.sb_sendreq, opal_list_t);
|
||||
#endif
|
||||
|
||||
asprintf(&path, "%s"OPAL_PATH_SEP"%s", orte_process_info.proc_session_dir,
|
||||
asprintf(&path, "%s"OPAL_PATH_SEP"%s", orte_proc_info.proc_session_dir,
|
||||
mmapfile);
|
||||
if(OPAL_SUCCESS != sb_mmap_file_open(path))
|
||||
return OPAL_ERR_FILE_OPEN_FAILURE;
|
||||
|
@ -63,7 +63,7 @@ void ompi_proc_construct(ompi_proc_t* proc)
|
||||
* the arch of the remote nodes, we will have to set the convertors to the correct
|
||||
* architecture.
|
||||
*/
|
||||
proc->proc_arch = orte_process_info.arch;
|
||||
proc->proc_arch = orte_proc_info.arch;
|
||||
proc->proc_convertor = ompi_mpi_local_convertor;
|
||||
OBJ_RETAIN( ompi_mpi_local_convertor );
|
||||
|
||||
@ -99,7 +99,7 @@ int ompi_proc_init(void)
|
||||
OBJ_CONSTRUCT(&ompi_proc_lock, opal_mutex_t);
|
||||
|
||||
/* create proc structures and find self */
|
||||
for( i = 0; i < orte_process_info.num_procs; i++ ) {
|
||||
for( i = 0; i < orte_proc_info.num_procs; i++ ) {
|
||||
ompi_proc_t *proc = OBJ_NEW(ompi_proc_t);
|
||||
opal_list_append(&ompi_proc_list, (opal_list_item_t*)proc);
|
||||
|
||||
@ -108,8 +108,8 @@ int ompi_proc_init(void)
|
||||
if (i == ORTE_PROC_MY_NAME->vpid) {
|
||||
ompi_proc_local_proc = proc;
|
||||
proc->proc_flags = OPAL_PROC_ALL_LOCAL;
|
||||
proc->proc_hostname = orte_process_info.nodename;
|
||||
proc->proc_arch = orte_process_info.arch;
|
||||
proc->proc_hostname = orte_proc_info.nodename;
|
||||
proc->proc_arch = orte_proc_info.arch;
|
||||
} else {
|
||||
/* get the locality information */
|
||||
proc->proc_flags = orte_ess.proc_get_locality(&proc->proc_name);
|
||||
@ -146,14 +146,14 @@ int ompi_proc_set_arch(void)
|
||||
if (proc->proc_name.vpid != ORTE_PROC_MY_NAME->vpid) {
|
||||
proc->proc_arch = orte_ess.proc_get_arch(&proc->proc_name);
|
||||
/* if arch is different than mine, create a new convertor for this proc */
|
||||
if (proc->proc_arch != orte_process_info.arch) {
|
||||
if (proc->proc_arch != orte_proc_info.arch) {
|
||||
#if OMPI_ENABLE_HETEROGENEOUS_SUPPORT
|
||||
OBJ_RELEASE(proc->proc_convertor);
|
||||
proc->proc_convertor = ompi_convertor_create(proc->proc_arch, 0);
|
||||
#else
|
||||
orte_show_help("help-mpi-runtime",
|
||||
"heterogeneous-support-unavailable",
|
||||
true, orte_process_info.nodename,
|
||||
true, orte_proc_info.nodename,
|
||||
proc->proc_hostname == NULL ? "<hostname unavailable>" :
|
||||
proc->proc_hostname);
|
||||
OPAL_THREAD_UNLOCK(&ompi_proc_lock);
|
||||
@ -353,21 +353,21 @@ int ompi_proc_refresh(void) {
|
||||
if (i == ORTE_PROC_MY_NAME->vpid) {
|
||||
ompi_proc_local_proc = proc;
|
||||
proc->proc_flags = OPAL_PROC_ALL_LOCAL;
|
||||
proc->proc_hostname = orte_process_info.nodename;
|
||||
proc->proc_arch = orte_process_info.arch;
|
||||
proc->proc_hostname = orte_proc_info.nodename;
|
||||
proc->proc_arch = orte_proc_info.arch;
|
||||
} else {
|
||||
proc->proc_flags = orte_ess.proc_get_locality(&proc->proc_name);
|
||||
proc->proc_hostname = orte_ess.proc_get_hostname(&proc->proc_name);
|
||||
proc->proc_arch = orte_ess.proc_get_arch(&proc->proc_name);
|
||||
/* if arch is different than mine, create a new convertor for this proc */
|
||||
if (proc->proc_arch != orte_process_info.arch) {
|
||||
if (proc->proc_arch != orte_proc_info.arch) {
|
||||
#if OMPI_ENABLE_HETEROGENEOUS_SUPPORT
|
||||
OBJ_RELEASE(proc->proc_convertor);
|
||||
proc->proc_convertor = ompi_convertor_create(proc->proc_arch, 0);
|
||||
#else
|
||||
orte_show_help("help-mpi-runtime",
|
||||
"heterogeneous-support-unavailable",
|
||||
true, orte_process_info.nodename,
|
||||
true, orte_proc_info.nodename,
|
||||
proc->proc_hostname == NULL ? "<hostname unavailable>" :
|
||||
proc->proc_hostname);
|
||||
OPAL_THREAD_UNLOCK(&ompi_proc_lock);
|
||||
@ -539,7 +539,7 @@ ompi_proc_unpack(opal_buffer_t* buf,
|
||||
#else
|
||||
orte_show_help("help-mpi-runtime",
|
||||
"heterogeneous-support-unavailable",
|
||||
true, orte_process_info.nodename,
|
||||
true, orte_proc_info.nodename,
|
||||
new_hostname == NULL ? "<hostname unavailable>" :
|
||||
new_hostname);
|
||||
free(plist);
|
||||
|
@ -68,7 +68,7 @@ ompi_mpi_abort(struct ompi_communicator_t* comm,
|
||||
gethostname. */
|
||||
|
||||
if (orte_initialized) {
|
||||
host = orte_process_info.nodename;
|
||||
host = orte_proc_info.nodename;
|
||||
} else {
|
||||
gethostname(hostname, sizeof(hostname));
|
||||
host = hostname;
|
||||
|
@ -135,7 +135,7 @@ static void warn_fork_cb(void)
|
||||
{
|
||||
if (ompi_mpi_initialized && !ompi_mpi_finalized && !fork_warning_issued) {
|
||||
orte_show_help("help-mpi-runtime.txt", "mpi_init:warn-fork", true,
|
||||
orte_process_info.nodename, getpid(),
|
||||
orte_proc_info.nodename, getpid(),
|
||||
ompi_mpi_comm_world.comm.c_my_rank);
|
||||
fork_warning_issued = true;
|
||||
}
|
||||
@ -341,7 +341,7 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
|
||||
}
|
||||
|
||||
/* Setup ORTE - note that we are not a tool */
|
||||
orte_process_info.mpi_proc = true;
|
||||
orte_proc_info.mpi_proc = true;
|
||||
if (ORTE_SUCCESS != (ret = orte_init(ORTE_NON_TOOL))) {
|
||||
error = "ompi_mpi_init: orte_init failed";
|
||||
goto error;
|
||||
@ -698,7 +698,7 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
|
||||
if (ompi_mpi_show_mca_params) {
|
||||
ompi_show_all_mca_params(ompi_mpi_comm_world.comm.c_my_rank,
|
||||
nprocs,
|
||||
orte_process_info.nodename);
|
||||
orte_proc_info.nodename);
|
||||
}
|
||||
|
||||
/* Do we need to wait for a debugger? */
|
||||
|
@ -275,9 +275,9 @@ void ompi_info::open_components()
|
||||
component_map["installdirs"] = &opal_installdirs_components;
|
||||
|
||||
// ORTE frameworks
|
||||
// Set orte_process_info.hnp to true to force all frameworks to
|
||||
// Set orte_proc_info.hnp to true to force all frameworks to
|
||||
// open components
|
||||
orte_process_info.hnp = true;
|
||||
orte_proc_info.hnp = true;
|
||||
|
||||
if (ORTE_SUCCESS != orte_errmgr_base_open()) {
|
||||
goto error;
|
||||
|
@ -94,7 +94,7 @@ int orte_errmgr_default_component_close(void)
|
||||
int orte_errmgr_default_component_query(mca_base_module_t **module, int *priority)
|
||||
{
|
||||
/* If we are not an HNP, then don't pick us! */
|
||||
if (!orte_process_info.hnp) {
|
||||
if (!orte_proc_info.hnp) {
|
||||
/* don't take me! */
|
||||
*module = NULL;
|
||||
return ORTE_ERROR;
|
||||
|
@ -85,13 +85,13 @@ static int rte_init(char flags)
|
||||
/* if I am a daemon, complete my setup using the
|
||||
* default procedure
|
||||
*/
|
||||
if (orte_process_info.daemon) {
|
||||
if (orte_proc_info.daemon) {
|
||||
if (ORTE_SUCCESS != (ret = orte_ess_base_orted_setup())) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
error = "orte_ess_base_orted_setup";
|
||||
goto error;
|
||||
}
|
||||
} else if (orte_process_info.tool) {
|
||||
} else if (orte_proc_info.tool) {
|
||||
/* otherwise, if I am a tool proc, use that procedure */
|
||||
if (ORTE_SUCCESS != (ret = orte_ess_base_tool_setup())) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
@ -112,7 +112,7 @@ static int rte_init(char flags)
|
||||
}
|
||||
|
||||
/* setup the nidmap arrays */
|
||||
if (ORTE_SUCCESS != (ret = orte_util_nidmap_init(orte_process_info.sync_buf))) {
|
||||
if (ORTE_SUCCESS != (ret = orte_util_nidmap_init(orte_proc_info.sync_buf))) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
error = "orte_util_nidmap_init";
|
||||
goto error;
|
||||
@ -133,11 +133,11 @@ static int rte_finalize(void)
|
||||
int ret;
|
||||
|
||||
/* if I am a daemon, finalize using the default procedure */
|
||||
if (orte_process_info.daemon) {
|
||||
if (orte_proc_info.daemon) {
|
||||
if (ORTE_SUCCESS != (ret = orte_ess_base_orted_finalize())) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
}
|
||||
} else if (orte_process_info.tool) {
|
||||
} else if (orte_proc_info.tool) {
|
||||
/* otherwise, if I am a tool proc, use that procedure */
|
||||
if (ORTE_SUCCESS != (ret = orte_ess_base_tool_finalize())) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
@ -357,7 +357,7 @@ static int alps_set_name(void)
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_ess_base_output,
|
||||
"ess:alps set name to %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
|
||||
orte_process_info.num_procs = (orte_std_cntr_t) cnos_get_size();
|
||||
orte_proc_info.num_procs = (orte_std_cntr_t) cnos_get_size();
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
@ -45,7 +45,7 @@ int orte_ess_env_get(void)
|
||||
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
|
||||
return ORTE_ERR_NOT_FOUND;
|
||||
}
|
||||
orte_process_info.num_procs = (orte_std_cntr_t)num_procs;
|
||||
orte_proc_info.num_procs = (orte_std_cntr_t)num_procs;
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
@ -123,12 +123,12 @@ int orte_ess_base_app_setup(void)
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_debug_output,
|
||||
"%s setting up session dir with\n\ttmpdir: %s\n\thost %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
(NULL == orte_process_info.tmpdir_base) ? "UNDEF" : orte_process_info.tmpdir_base,
|
||||
orte_process_info.nodename));
|
||||
(NULL == orte_proc_info.tmpdir_base) ? "UNDEF" : orte_proc_info.tmpdir_base,
|
||||
orte_proc_info.nodename));
|
||||
|
||||
if (ORTE_SUCCESS != (ret = orte_session_dir(true,
|
||||
orte_process_info.tmpdir_base,
|
||||
orte_process_info.nodename, NULL,
|
||||
orte_proc_info.tmpdir_base,
|
||||
orte_proc_info.nodename, NULL,
|
||||
ORTE_PROC_MY_NAME))) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
error = "orte_session_dir";
|
||||
@ -138,7 +138,7 @@ int orte_ess_base_app_setup(void)
|
||||
/* Once the session directory location has been established, set
|
||||
the opal_output env file location to be in the
|
||||
proc-specific session directory. */
|
||||
opal_output_set_output_file_info(orte_process_info.proc_session_dir,
|
||||
opal_output_set_output_file_info(orte_proc_info.proc_session_dir,
|
||||
"output-", NULL, NULL);
|
||||
|
||||
|
||||
@ -164,7 +164,7 @@ int orte_ess_base_app_setup(void)
|
||||
error = "orte_snapc_base_open";
|
||||
goto error;
|
||||
}
|
||||
if (ORTE_SUCCESS != (ret = orte_snapc_base_select(orte_process_info.hnp, !orte_process_info.daemon))) {
|
||||
if (ORTE_SUCCESS != (ret = orte_snapc_base_select(orte_proc_info.hnp, !orte_proc_info.daemon))) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
error = "orte_snapc_base_select";
|
||||
goto error;
|
||||
@ -278,7 +278,7 @@ void orte_ess_base_app_abort(int status, bool report)
|
||||
* write an "abort" file into our session directory
|
||||
*/
|
||||
if (report) {
|
||||
abort_file = opal_os_path(false, orte_process_info.proc_session_dir, "abort", NULL);
|
||||
abort_file = opal_os_path(false, orte_proc_info.proc_session_dir, "abort", NULL);
|
||||
if (NULL == abort_file) {
|
||||
/* got a problem */
|
||||
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
|
||||
|
@ -187,12 +187,12 @@ int orte_ess_base_orted_setup(void)
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_debug_output,
|
||||
"%s setting up session dir with\n\ttmpdir: %s\n\thost %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
(NULL == orte_process_info.tmpdir_base) ? "UNDEF" : orte_process_info.tmpdir_base,
|
||||
orte_process_info.nodename));
|
||||
(NULL == orte_proc_info.tmpdir_base) ? "UNDEF" : orte_proc_info.tmpdir_base,
|
||||
orte_proc_info.nodename));
|
||||
|
||||
if (ORTE_SUCCESS != (ret = orte_session_dir(true,
|
||||
orte_process_info.tmpdir_base,
|
||||
orte_process_info.nodename, NULL,
|
||||
orte_proc_info.tmpdir_base,
|
||||
orte_proc_info.nodename, NULL,
|
||||
ORTE_PROC_MY_NAME))) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
error = "orte_session_dir";
|
||||
@ -243,7 +243,7 @@ int orte_ess_base_orted_setup(void)
|
||||
goto error;
|
||||
}
|
||||
|
||||
if (ORTE_SUCCESS != (ret = orte_snapc_base_select(orte_process_info.hnp, !orte_process_info.daemon))) {
|
||||
if (ORTE_SUCCESS != (ret = orte_snapc_base_select(orte_proc_info.hnp, !orte_proc_info.daemon))) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
error = "orte_snapc_base_select";
|
||||
goto error;
|
||||
|
@ -98,9 +98,9 @@ int orte_ess_base_tool_setup(void)
|
||||
* this node might be located
|
||||
*/
|
||||
if (ORTE_SUCCESS != (ret = orte_session_dir_get_name(NULL,
|
||||
&orte_process_info.tmpdir_base,
|
||||
&orte_process_info.top_session_dir,
|
||||
orte_process_info.nodename, NULL, NULL))) {
|
||||
&orte_proc_info.tmpdir_base,
|
||||
&orte_proc_info.top_session_dir,
|
||||
orte_proc_info.nodename, NULL, NULL))) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
error = "define session dir names";
|
||||
goto error;
|
||||
@ -136,7 +136,7 @@ int orte_ess_base_tool_setup(void)
|
||||
error = "orte_snapc_base_open";
|
||||
goto error;
|
||||
}
|
||||
if (ORTE_SUCCESS != (ret = orte_snapc_base_select(orte_process_info.hnp, !orte_process_info.daemon))) {
|
||||
if (ORTE_SUCCESS != (ret = orte_snapc_base_select(orte_proc_info.hnp, !orte_proc_info.daemon))) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
error = "orte_snapc_base_select";
|
||||
goto error;
|
||||
|
@ -79,14 +79,14 @@ static int rte_init(char flags)
|
||||
/* if I am a daemon, complete my setup using the
|
||||
* default procedure
|
||||
*/
|
||||
if (orte_process_info.daemon) {
|
||||
if (orte_proc_info.daemon) {
|
||||
if (ORTE_SUCCESS != (ret = orte_ess_base_orted_setup())) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
error = "orte_ess_base_orted_setup";
|
||||
goto error;
|
||||
}
|
||||
|
||||
} else if (orte_process_info.tool) {
|
||||
} else if (orte_proc_info.tool) {
|
||||
/* otherwise, if I am a tool proc, use that procedure */
|
||||
if (ORTE_SUCCESS != (ret = orte_ess_base_tool_setup())) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
@ -109,7 +109,7 @@ static int rte_init(char flags)
|
||||
opal_pointer_array_init(&nidmap, 8, INT32_MAX, 8);
|
||||
|
||||
/* if one was provided, build my nidmap */
|
||||
if (ORTE_SUCCESS != (ret = orte_ess_base_build_nidmap(orte_process_info.sync_buf,
|
||||
if (ORTE_SUCCESS != (ret = orte_ess_base_build_nidmap(orte_proc_info.sync_buf,
|
||||
&nidmap, &pmap, &nprocs))) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
error = "orte_ess_base_build_nidmap";
|
||||
@ -134,11 +134,11 @@ static int rte_finalize(void)
|
||||
int32_t i;
|
||||
|
||||
/* if I am a daemon, finalize using the default procedure */
|
||||
if (orte_process_info.daemon) {
|
||||
if (orte_proc_info.daemon) {
|
||||
if (ORTE_SUCCESS != (ret = orte_ess_base_orted_finalize())) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
}
|
||||
} else if (orte_process_info.tool) {
|
||||
} else if (orte_proc_info.tool) {
|
||||
/* otherwise, if I am a tool proc, use that procedure */
|
||||
if (ORTE_SUCCESS != (ret = orte_ess_base_tool_finalize())) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
@ -371,10 +371,10 @@ static int bproc_set_name(void)
|
||||
ORTE_PROC_MY_NAME->vpid = vpid_start + (bproc_rank * stride);
|
||||
|
||||
|
||||
if(NULL != orte_process_info.nodename) {
|
||||
free(orte_process_info.nodename);
|
||||
if(NULL != orte_proc_info.nodename) {
|
||||
free(orte_proc_info.nodename);
|
||||
}
|
||||
asprintf(&orte_process_info.nodename, "%d", bproc_currnode());
|
||||
asprintf(&orte_proc_info.nodename, "%d", bproc_currnode());
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
@ -85,7 +85,7 @@ static int rte_init(char flags)
|
||||
ORTE_PROC_MY_NAME->vpid = (orte_vpid_t) cnos_get_rank();
|
||||
|
||||
/* Get the number of procs in the job from cnos */
|
||||
orte_process_info.num_procs = (orte_std_cntr_t) cnos_get_size();
|
||||
orte_proc_info.num_procs = (orte_std_cntr_t) cnos_get_size();
|
||||
|
||||
/* Get the nid map */
|
||||
nprocs = cnos_get_nidpid_map(&map);
|
||||
@ -146,7 +146,7 @@ static char* proc_get_hostname(orte_process_name_t *proc)
|
||||
static uint32_t proc_get_arch(orte_process_name_t *proc)
|
||||
{
|
||||
/* always homogeneous, so other side is always same as us */
|
||||
return orte_process_info.arch;
|
||||
return orte_proc_info.arch;
|
||||
}
|
||||
|
||||
static int update_arch(orte_process_name_t *proc, uint32_t arch)
|
||||
|
2
orte/mca/ess/env/ess_env_component.c
поставляемый
2
orte/mca/ess/env/ess_env_component.c
поставляемый
@ -81,7 +81,7 @@ int orte_ess_env_component_query(mca_base_module_t **module, int *priority)
|
||||
* it would be impossible for the correct env vars
|
||||
* to have been set!
|
||||
*/
|
||||
if (NULL != orte_process_info.my_hnp_uri) {
|
||||
if (NULL != orte_proc_info.my_hnp_uri) {
|
||||
*priority = 20;
|
||||
*module = (mca_base_module_t *)&orte_ess_env_module;
|
||||
return ORTE_SUCCESS;
|
||||
|
24
orte/mca/ess/env/ess_env_module.c
поставляемый
24
orte/mca/ess/env/ess_env_module.c
поставляемый
@ -136,14 +136,14 @@ static int rte_init(char flags)
|
||||
/* if I am a daemon, complete my setup using the
|
||||
* default procedure
|
||||
*/
|
||||
if (orte_process_info.daemon) {
|
||||
if (orte_proc_info.daemon) {
|
||||
if (ORTE_SUCCESS != (ret = orte_ess_base_orted_setup())) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
error = "orte_ess_base_orted_setup";
|
||||
goto error;
|
||||
}
|
||||
|
||||
} else if (orte_process_info.tool) {
|
||||
} else if (orte_proc_info.tool) {
|
||||
/* otherwise, if I am a tool proc, use that procedure */
|
||||
if (ORTE_SUCCESS != (ret = orte_ess_base_tool_setup())) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
@ -165,7 +165,7 @@ static int rte_init(char flags)
|
||||
}
|
||||
|
||||
/* if one was provided, build my nidmap */
|
||||
if (ORTE_SUCCESS != (ret = orte_util_nidmap_init(orte_process_info.sync_buf))) {
|
||||
if (ORTE_SUCCESS != (ret = orte_util_nidmap_init(orte_proc_info.sync_buf))) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
error = "orte_util_nidmap_init";
|
||||
goto error;
|
||||
@ -186,11 +186,11 @@ static int rte_finalize(void)
|
||||
int ret;
|
||||
|
||||
/* if I am a daemon, finalize using the default procedure */
|
||||
if (orte_process_info.daemon) {
|
||||
if (orte_proc_info.daemon) {
|
||||
if (ORTE_SUCCESS != (ret = orte_ess_base_orted_finalize())) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
}
|
||||
} else if (orte_process_info.tool) {
|
||||
} else if (orte_proc_info.tool) {
|
||||
/* otherwise, if I am a tool proc, use that procedure */
|
||||
if (ORTE_SUCCESS != (ret = orte_ess_base_tool_finalize())) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
@ -505,12 +505,12 @@ static int rte_ft_event(int state)
|
||||
* Restart the routed framework
|
||||
* JJH: Lie to the finalize function so it does not try to contact the daemon.
|
||||
*/
|
||||
orte_process_info.tool = true;
|
||||
orte_proc_info.tool = true;
|
||||
if (ORTE_SUCCESS != (ret = orte_routed.finalize()) ) {
|
||||
exit_status = ret;
|
||||
goto cleanup;
|
||||
}
|
||||
orte_process_info.tool = false;
|
||||
orte_proc_info.tool = false;
|
||||
if (ORTE_SUCCESS != (ret = orte_routed.initialize()) ) {
|
||||
exit_status = ret;
|
||||
goto cleanup;
|
||||
@ -556,14 +556,14 @@ static int rte_ft_event(int state)
|
||||
* Session directory re-init
|
||||
*/
|
||||
if (ORTE_SUCCESS != (ret = orte_session_dir(true,
|
||||
orte_process_info.tmpdir_base,
|
||||
orte_process_info.nodename,
|
||||
orte_proc_info.tmpdir_base,
|
||||
orte_proc_info.nodename,
|
||||
NULL, /* Batch ID -- Not used */
|
||||
ORTE_PROC_MY_NAME))) {
|
||||
exit_status = ret;
|
||||
}
|
||||
|
||||
opal_output_set_output_file_info(orte_process_info.proc_session_dir,
|
||||
opal_output_set_output_file_info(orte_proc_info.proc_session_dir,
|
||||
"output-", NULL, NULL);
|
||||
|
||||
/*
|
||||
@ -590,13 +590,13 @@ static int rte_ft_event(int state)
|
||||
* - Note: BLCR does this because it tries to preseve the PID
|
||||
* of the program across checkpointes
|
||||
*/
|
||||
if( ORTE_SUCCESS != (ret = ess_env_ft_event_update_process_info(orte_process_info.my_name, getpid())) ) {
|
||||
if( ORTE_SUCCESS != (ret = ess_env_ft_event_update_process_info(orte_proc_info.my_name, getpid())) ) {
|
||||
exit_status = ret;
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
/* if one was provided, build my nidmap */
|
||||
if (ORTE_SUCCESS != (ret = orte_util_nidmap_init(orte_process_info.sync_buf))) {
|
||||
if (ORTE_SUCCESS != (ret = orte_util_nidmap_init(orte_proc_info.sync_buf))) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
exit_status = ret;
|
||||
goto cleanup;
|
||||
|
@ -73,7 +73,7 @@ int orte_ess_hnp_component_query(mca_base_module_t **module, int *priority)
|
||||
/* we are the hnp module - we need to be selected
|
||||
* IFF we are designated as the hnp
|
||||
*/
|
||||
if (orte_process_info.hnp) {
|
||||
if (orte_proc_info.hnp) {
|
||||
*priority = 100;
|
||||
*module = (mca_base_module_t *)&orte_ess_hnp_module;
|
||||
return ORTE_SUCCESS;
|
||||
|
@ -287,12 +287,12 @@ static int rte_init(char flags)
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_debug_output,
|
||||
"%s setting up session dir with\n\ttmpdir: %s\n\thost %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
(NULL == orte_process_info.tmpdir_base) ? "UNDEF" : orte_process_info.tmpdir_base,
|
||||
orte_process_info.nodename));
|
||||
(NULL == orte_proc_info.tmpdir_base) ? "UNDEF" : orte_proc_info.tmpdir_base,
|
||||
orte_proc_info.nodename));
|
||||
|
||||
if (ORTE_SUCCESS != (ret = orte_session_dir(true,
|
||||
orte_process_info.tmpdir_base,
|
||||
orte_process_info.nodename, NULL,
|
||||
orte_proc_info.tmpdir_base,
|
||||
orte_proc_info.nodename, NULL,
|
||||
ORTE_PROC_MY_NAME))) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
error = "orte_session_dir";
|
||||
@ -302,11 +302,11 @@ static int rte_init(char flags)
|
||||
/* Once the session directory location has been established, set
|
||||
the opal_output hnp file location to be in the
|
||||
proc-specific session directory. */
|
||||
opal_output_set_output_file_info(orte_process_info.proc_session_dir,
|
||||
opal_output_set_output_file_info(orte_proc_info.proc_session_dir,
|
||||
"output-", NULL, NULL);
|
||||
|
||||
/* save my contact info in a file for others to find */
|
||||
jobfam_dir = opal_dirname(orte_process_info.job_session_dir);
|
||||
jobfam_dir = opal_dirname(orte_proc_info.job_session_dir);
|
||||
contact_path = opal_os_path(false, jobfam_dir, "contact.txt", NULL);
|
||||
free(jobfam_dir);
|
||||
|
||||
@ -356,15 +356,15 @@ static int rte_init(char flags)
|
||||
|
||||
/* create and store a node object where we are */
|
||||
node = OBJ_NEW(orte_node_t);
|
||||
node->name = strdup(orte_process_info.nodename);
|
||||
node->arch = orte_process_info.arch;
|
||||
node->name = strdup(orte_proc_info.nodename);
|
||||
node->arch = orte_proc_info.arch;
|
||||
node->index = opal_pointer_array_add(orte_node_pool, node);
|
||||
|
||||
/* create and store a proc object for us */
|
||||
proc = OBJ_NEW(orte_proc_t);
|
||||
proc->name.jobid = ORTE_PROC_MY_NAME->jobid;
|
||||
proc->name.vpid = ORTE_PROC_MY_NAME->vpid;
|
||||
proc->pid = orte_process_info.pid;
|
||||
proc->pid = orte_proc_info.pid;
|
||||
proc->rml_uri = orte_rml.get_contact_info();
|
||||
proc->state = ORTE_PROC_STATE_RUNNING;
|
||||
OBJ_RETAIN(node); /* keep accounting straight */
|
||||
@ -431,7 +431,7 @@ static int rte_init(char flags)
|
||||
goto error;
|
||||
}
|
||||
|
||||
if (ORTE_SUCCESS != (ret = orte_snapc_base_select(orte_process_info.hnp, !orte_process_info.daemon))) {
|
||||
if (ORTE_SUCCESS != (ret = orte_snapc_base_select(orte_proc_info.hnp, !orte_proc_info.daemon))) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
error = "orte_snapc_base_select";
|
||||
goto error;
|
||||
@ -489,7 +489,7 @@ static int rte_finalize(void)
|
||||
int i;
|
||||
|
||||
/* remove my contact info file */
|
||||
contact_path = opal_os_path(false, orte_process_info.top_session_dir,
|
||||
contact_path = opal_os_path(false, orte_proc_info.top_session_dir,
|
||||
"contact.txt", NULL);
|
||||
unlink(contact_path);
|
||||
free(contact_path);
|
||||
|
@ -72,7 +72,7 @@ int orte_ess_lsf_component_query(mca_base_module_t **module, int *priority)
|
||||
*/
|
||||
|
||||
if (NULL != getenv("LSB_JOBID") &&
|
||||
NULL != orte_process_info.my_hnp_uri) {
|
||||
NULL != orte_proc_info.my_hnp_uri) {
|
||||
*priority = 40;
|
||||
*module = (mca_base_module_t *)&orte_ess_lsf_module;
|
||||
return ORTE_SUCCESS;
|
||||
|
@ -96,13 +96,13 @@ static int rte_init(char flags)
|
||||
/* if I am a daemon, complete my setup using the
|
||||
* default procedure
|
||||
*/
|
||||
if (orte_process_info.daemon) {
|
||||
if (orte_proc_info.daemon) {
|
||||
if (ORTE_SUCCESS != (ret = orte_ess_base_orted_setup())) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
error = "orte_ess_base_orted_setup";
|
||||
goto error;
|
||||
}
|
||||
} else if (orte_process_info.tool) {
|
||||
} else if (orte_proc_info.tool) {
|
||||
/* otherwise, if I am a tool proc, use that procedure */
|
||||
if (ORTE_SUCCESS != (ret = orte_ess_base_tool_setup())) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
@ -124,7 +124,7 @@ static int rte_init(char flags)
|
||||
}
|
||||
|
||||
/* setup the nidmap arrays */
|
||||
if (ORTE_SUCCESS != (ret = orte_util_nidmap_init(orte_process_info.sync_buf))) {
|
||||
if (ORTE_SUCCESS != (ret = orte_util_nidmap_init(orte_proc_info.sync_buf))) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
error = "orte_util_nidmap_init";
|
||||
goto error;
|
||||
@ -145,11 +145,11 @@ static int rte_finalize(void)
|
||||
int ret;
|
||||
|
||||
/* if I am a daemon, finalize using the default procedure */
|
||||
if (orte_process_info.daemon) {
|
||||
if (orte_proc_info.daemon) {
|
||||
if (ORTE_SUCCESS != (ret = orte_ess_base_orted_finalize())) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
}
|
||||
} else if (orte_process_info.tool) {
|
||||
} else if (orte_proc_info.tool) {
|
||||
/* otherwise, if I am a tool proc, use that procedure */
|
||||
if (ORTE_SUCCESS != (ret = orte_ess_base_tool_finalize())) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
|
@ -101,7 +101,7 @@ static int rte_init(char flags)
|
||||
*/
|
||||
/* split the nidmap string */
|
||||
nidmap = opal_argv_split(nidmap_string, ':');
|
||||
orte_process_info.num_procs = (orte_std_cntr_t) opal_argv_count(nidmap);
|
||||
orte_proc_info.num_procs = (orte_std_cntr_t) opal_argv_count(nidmap);
|
||||
|
||||
/* MPI_Init needs the grpcomm framework, so we have to init it */
|
||||
if (ORTE_SUCCESS != (rc = orte_grpcomm_base_open())) {
|
||||
@ -156,7 +156,7 @@ static char* proc_get_hostname(orte_process_name_t *proc)
|
||||
|
||||
static uint32_t proc_get_arch(orte_process_name_t *proc)
|
||||
{
|
||||
return orte_process_info.arch;
|
||||
return orte_proc_info.arch;
|
||||
}
|
||||
|
||||
static int update_arch(orte_process_name_t *proc, uint32_t arch)
|
||||
|
@ -73,9 +73,9 @@ int orte_ess_singleton_component_query(mca_base_module_t **module, int *priority
|
||||
/* if we are an HNP, daemon, or tool, then we
|
||||
* are definitely not a singleton!
|
||||
*/
|
||||
if (orte_process_info.hnp ||
|
||||
orte_process_info.daemon ||
|
||||
orte_process_info.tool) {
|
||||
if (orte_proc_info.hnp ||
|
||||
orte_proc_info.daemon ||
|
||||
orte_proc_info.tool) {
|
||||
*module = NULL;
|
||||
return ORTE_ERROR;
|
||||
}
|
||||
@ -85,7 +85,7 @@ int orte_ess_singleton_component_query(mca_base_module_t **module, int *priority
|
||||
* given an HNP URI, then we are definitely
|
||||
* not a singleton
|
||||
*/
|
||||
if (NULL != orte_process_info.my_hnp_uri) {
|
||||
if (NULL != orte_proc_info.my_hnp_uri) {
|
||||
*module = NULL;
|
||||
return ORTE_ERROR;
|
||||
}
|
||||
|
@ -144,7 +144,7 @@ static int rte_init(char flags)
|
||||
return rc;
|
||||
}
|
||||
|
||||
orte_process_info.num_procs = 1;
|
||||
orte_proc_info.num_procs = 1;
|
||||
|
||||
/* NOTE: do not wireup our io - let the fork'd orted serve
|
||||
* as our io handler. This prevents issues with the event
|
||||
@ -275,8 +275,8 @@ static int fork_hnp(void)
|
||||
}
|
||||
|
||||
/* Fork off the child */
|
||||
orte_process_info.hnp_pid = fork();
|
||||
if(orte_process_info.hnp_pid < 0) {
|
||||
orte_proc_info.hnp_pid = fork();
|
||||
if(orte_proc_info.hnp_pid < 0) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_SYS_LIMITS_CHILDREN);
|
||||
close(p[0]);
|
||||
close(p[1]);
|
||||
@ -286,7 +286,7 @@ static int fork_hnp(void)
|
||||
return ORTE_ERR_SYS_LIMITS_CHILDREN;
|
||||
}
|
||||
|
||||
if (orte_process_info.hnp_pid == 0) {
|
||||
if (orte_proc_info.hnp_pid == 0) {
|
||||
close(p[0]);
|
||||
close(death_pipe[1]);
|
||||
/* I am the child - exec me */
|
||||
@ -368,13 +368,13 @@ static int fork_hnp(void)
|
||||
return rc;
|
||||
}
|
||||
/* save the daemon uri - we will process it later */
|
||||
orte_process_info.my_daemon_uri = strdup(orted_uri);
|
||||
orte_proc_info.my_daemon_uri = strdup(orted_uri);
|
||||
|
||||
/* likewise, since this is also the HNP, set that uri too */
|
||||
orte_process_info.my_hnp_uri = strdup(orted_uri);
|
||||
orte_proc_info.my_hnp_uri = strdup(orted_uri);
|
||||
|
||||
/* indicate we are a singleton so orte_init knows what to do */
|
||||
orte_process_info.singleton = true;
|
||||
orte_proc_info.singleton = true;
|
||||
/* all done - report success */
|
||||
free(orted_uri);
|
||||
return ORTE_SUCCESS;
|
||||
|
@ -209,7 +209,7 @@ static char* proc_get_hostname(orte_process_name_t *proc)
|
||||
/* if it is me, the answer is my nodename */
|
||||
if (proc->jobid == ORTE_PROC_MY_NAME->jobid &&
|
||||
proc->vpid == ORTE_PROC_MY_NAME->vpid) {
|
||||
return orte_process_info.nodename;
|
||||
return orte_proc_info.nodename;
|
||||
}
|
||||
|
||||
/* otherwise, no idea */
|
||||
@ -221,7 +221,7 @@ static uint32_t proc_get_arch(orte_process_name_t *proc)
|
||||
/* if it is me, the answer is my arch */
|
||||
if (proc->jobid == ORTE_PROC_MY_NAME->jobid &&
|
||||
proc->vpid == ORTE_PROC_MY_NAME->vpid) {
|
||||
return orte_process_info.arch;
|
||||
return orte_proc_info.arch;
|
||||
}
|
||||
|
||||
/* otherwise, no idea */
|
||||
@ -401,12 +401,12 @@ static int rte_ft_event(int state)
|
||||
* Restart the routed framework
|
||||
* JJH: Lie to the finalize function so it does not try to contact the daemon.
|
||||
*/
|
||||
orte_process_info.tool = true;
|
||||
orte_proc_info.tool = true;
|
||||
if (ORTE_SUCCESS != (ret = orte_routed.finalize()) ) {
|
||||
exit_status = ret;
|
||||
goto cleanup;
|
||||
}
|
||||
orte_process_info.tool = false;
|
||||
orte_proc_info.tool = false;
|
||||
if (ORTE_SUCCESS != (ret = orte_routed.initialize()) ) {
|
||||
exit_status = ret;
|
||||
goto cleanup;
|
||||
@ -452,14 +452,14 @@ static int rte_ft_event(int state)
|
||||
* Session directory re-init
|
||||
*/
|
||||
if (ORTE_SUCCESS != (ret = orte_session_dir(true,
|
||||
orte_process_info.tmpdir_base,
|
||||
orte_process_info.nodename,
|
||||
orte_proc_info.tmpdir_base,
|
||||
orte_proc_info.nodename,
|
||||
NULL, /* Batch ID -- Not used */
|
||||
ORTE_PROC_MY_NAME))) {
|
||||
exit_status = ret;
|
||||
}
|
||||
|
||||
opal_output_set_output_file_info(orte_process_info.proc_session_dir,
|
||||
opal_output_set_output_file_info(orte_proc_info.proc_session_dir,
|
||||
"output-", NULL, NULL);
|
||||
|
||||
/*
|
||||
@ -486,13 +486,13 @@ static int rte_ft_event(int state)
|
||||
* - Note: BLCR does this because it tries to preseve the PID
|
||||
* of the program across checkpointes
|
||||
*/
|
||||
if( ORTE_SUCCESS != (ret = ess_slave_ft_event_update_process_info(orte_process_info.my_name, getpid())) ) {
|
||||
if( ORTE_SUCCESS != (ret = ess_slave_ft_event_update_process_info(orte_proc_info.my_name, getpid())) ) {
|
||||
exit_status = ret;
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
/* if one was provided, build my nidmap */
|
||||
if (ORTE_SUCCESS != (ret = orte_util_nidmap_init(orte_process_info.sync_buf))) {
|
||||
if (ORTE_SUCCESS != (ret = orte_util_nidmap_init(orte_proc_info.sync_buf))) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
exit_status = ret;
|
||||
goto cleanup;
|
||||
|
@ -76,7 +76,7 @@ int orte_ess_slurm_component_query(mca_base_module_t **module, int *priority)
|
||||
*/
|
||||
|
||||
if (NULL != getenv("SLURM_JOBID") &&
|
||||
NULL != orte_process_info.my_hnp_uri) {
|
||||
NULL != orte_proc_info.my_hnp_uri) {
|
||||
*priority = 30;
|
||||
*module = (mca_base_module_t *)&orte_ess_slurm_module;
|
||||
return ORTE_SUCCESS;
|
||||
|
@ -108,7 +108,7 @@ static int rte_init(char flags)
|
||||
/* if I am a daemon, complete my setup using the
|
||||
* default procedure
|
||||
*/
|
||||
if (orte_process_info.daemon) {
|
||||
if (orte_proc_info.daemon) {
|
||||
if (ORTE_SUCCESS != (ret = orte_ess_base_orted_setup())) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
error = "orte_ess_base_orted_setup";
|
||||
@ -140,7 +140,7 @@ static int rte_init(char flags)
|
||||
}
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
} else if (orte_process_info.tool) {
|
||||
} else if (orte_proc_info.tool) {
|
||||
/* otherwise, if I am a tool proc, use that procedure */
|
||||
if (ORTE_SUCCESS != (ret = orte_ess_base_tool_setup())) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
@ -162,7 +162,7 @@ static int rte_init(char flags)
|
||||
}
|
||||
|
||||
/* setup the nidmap arrays */
|
||||
if (ORTE_SUCCESS != (ret = orte_util_nidmap_init(orte_process_info.sync_buf))) {
|
||||
if (ORTE_SUCCESS != (ret = orte_util_nidmap_init(orte_proc_info.sync_buf))) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
error = "orte_util_nidmap_init";
|
||||
goto error;
|
||||
@ -183,11 +183,11 @@ static int rte_finalize(void)
|
||||
int ret;
|
||||
|
||||
/* if I am a daemon, finalize using the default procedure */
|
||||
if (orte_process_info.daemon) {
|
||||
if (orte_proc_info.daemon) {
|
||||
if (ORTE_SUCCESS != (ret = orte_ess_base_orted_finalize())) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
}
|
||||
} else if (orte_process_info.tool) {
|
||||
} else if (orte_proc_info.tool) {
|
||||
/* otherwise, if I am a tool proc, use that procedure */
|
||||
if (ORTE_SUCCESS != (ret = orte_ess_base_tool_finalize())) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
@ -420,15 +420,15 @@ static int slurm_set_name(void)
|
||||
"ess:slurm set name to %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
|
||||
/* fix up the system info nodename to match exactly what slurm returned */
|
||||
if (NULL != orte_process_info.nodename) {
|
||||
free(orte_process_info.nodename);
|
||||
if (NULL != orte_proc_info.nodename) {
|
||||
free(orte_proc_info.nodename);
|
||||
}
|
||||
orte_process_info.nodename = get_slurm_nodename(slurm_nodeid);
|
||||
orte_proc_info.nodename = get_slurm_nodename(slurm_nodeid);
|
||||
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_ess_base_output,
|
||||
"ess:slurm set nodename to %s",
|
||||
orte_process_info.nodename));
|
||||
orte_proc_info.nodename));
|
||||
|
||||
/* get the non-name common environmental variables */
|
||||
if (ORTE_SUCCESS != (rc = orte_ess_env_get())) {
|
||||
@ -554,7 +554,7 @@ static int build_daemon_nidmap(void)
|
||||
/* construct the URI */
|
||||
proc.vpid = node->daemon;
|
||||
orte_util_convert_process_name_to_string(&proc_name, &proc);
|
||||
asprintf(&uri, "%s;tcp://%s:%d", proc_name, addr, (int)orte_process_info.my_port);
|
||||
asprintf(&uri, "%s;tcp://%s:%d", proc_name, addr, (int)orte_proc_info.my_port);
|
||||
opal_output(0, "contact info %s", uri);
|
||||
opal_dss.pack(&buf, &uri, 1, OPAL_STRING);
|
||||
free(proc_name);
|
||||
|
@ -74,10 +74,10 @@ int orte_ess_slurmd_component_query(mca_base_module_t **module, int *priority)
|
||||
* by mpirun but are in a slurm world
|
||||
*/
|
||||
|
||||
if (orte_process_info.mpi_proc &&
|
||||
if (orte_proc_info.mpi_proc &&
|
||||
NULL != getenv("SLURM_JOBID") &&
|
||||
NULL != getenv("SLURM_STEPID") &&
|
||||
NULL == orte_process_info.my_hnp_uri) {
|
||||
NULL == orte_proc_info.my_hnp_uri) {
|
||||
*priority = 30;
|
||||
*module = (mca_base_module_t *)&orte_ess_slurmd_module;
|
||||
return ORTE_SUCCESS;
|
||||
|
@ -168,7 +168,7 @@ static int rte_init(char flags)
|
||||
error = "could not get SLURM_STEP_NUM_TASKS";
|
||||
goto error;
|
||||
}
|
||||
orte_process_info.num_procs = strtol(envar, NULL, 10);
|
||||
orte_proc_info.num_procs = strtol(envar, NULL, 10);
|
||||
|
||||
/* get my local nodeid */
|
||||
if (NULL == (envar = getenv("SLURM_NODEID"))) {
|
||||
@ -207,7 +207,7 @@ static int rte_init(char flags)
|
||||
goto error;
|
||||
}
|
||||
num_nodes = opal_argv_count(nodes);
|
||||
orte_process_info.num_nodes = num_nodes;
|
||||
orte_proc_info.num_nodes = num_nodes;
|
||||
|
||||
/* compute the ppn */
|
||||
if (ORTE_SUCCESS != (ret = orte_regex_extract_ppn(num_nodes, tasks_per_node, &ppn))) {
|
||||
@ -245,7 +245,7 @@ static int rte_init(char flags)
|
||||
}
|
||||
|
||||
/* set the size of the nidmap storage so we minimize realloc's */
|
||||
if (ORTE_SUCCESS != (ret = opal_pointer_array_set_size(&orte_nidmap, orte_process_info.num_nodes))) {
|
||||
if (ORTE_SUCCESS != (ret = opal_pointer_array_set_size(&orte_nidmap, orte_proc_info.num_nodes))) {
|
||||
error = "could not set pointer array size for nidmap";
|
||||
goto error;
|
||||
}
|
||||
@ -264,7 +264,7 @@ static int rte_init(char flags)
|
||||
jmap->job = ORTE_PROC_MY_NAME->jobid;
|
||||
opal_pointer_array_add(&orte_jobmap, jmap);
|
||||
/* update the num procs */
|
||||
jmap->num_procs = orte_process_info.num_procs;
|
||||
jmap->num_procs = orte_proc_info.num_procs;
|
||||
/* set the size of the pidmap storage so we minimize realloc's */
|
||||
if (ORTE_SUCCESS != (ret = opal_pointer_array_set_size(&jmap->pmap, jmap->num_procs))) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
@ -301,8 +301,8 @@ static int rte_init(char flags)
|
||||
} else if (cyclic) {
|
||||
/* cycle across the nodes */
|
||||
vpid = 0;
|
||||
while (vpid < orte_process_info.num_procs) {
|
||||
for (i=0; i < num_nodes && vpid < orte_process_info.num_procs; i++) {
|
||||
while (vpid < orte_proc_info.num_procs) {
|
||||
for (i=0; i < num_nodes && vpid < orte_proc_info.num_procs; i++) {
|
||||
if (0 < ppn[i]) {
|
||||
node = (orte_nid_t*)orte_nidmap.addr[i];
|
||||
pmap = OBJ_NEW(orte_pmap_t);
|
||||
|
@ -74,7 +74,7 @@ int orte_ess_tool_component_query(mca_base_module_t **module, int *priority)
|
||||
* precedence. This would happen, for example,
|
||||
* if the tool is a distributed set of processes
|
||||
*/
|
||||
if (orte_process_info.tool) {
|
||||
if (orte_proc_info.tool) {
|
||||
*priority = 10;
|
||||
*module = (mca_base_module_t *)&orte_ess_tool_module;
|
||||
return ORTE_SUCCESS;
|
||||
|
@ -225,7 +225,7 @@ int orte_filem_base_get_proc_node_name(orte_process_name_t *proc, char **machine
|
||||
/* set default answer */
|
||||
*machine_name = NULL;
|
||||
|
||||
if (orte_process_info.hnp) {
|
||||
if (orte_proc_info.hnp) {
|
||||
/* if I am the HNP, then all the data structures are local to me - no
|
||||
* need to send messages around to get the info
|
||||
*/
|
||||
|
@ -68,7 +68,7 @@ int orte_filem_base_comm_start(void)
|
||||
int rc;
|
||||
|
||||
/* Only active in HNP and daemons */
|
||||
if( !orte_process_info.hnp && !orte_process_info.daemon ) {
|
||||
if( !orte_proc_info.hnp && !orte_proc_info.daemon ) {
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
if ( recv_issued ) {
|
||||
@ -98,7 +98,7 @@ int orte_filem_base_comm_stop(void)
|
||||
int rc;
|
||||
|
||||
/* Only active in HNP and daemons */
|
||||
if( !orte_process_info.hnp && !orte_process_info.daemon ) {
|
||||
if( !orte_proc_info.hnp && !orte_proc_info.daemon ) {
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
if ( recv_issued ) {
|
||||
|
@ -622,7 +622,7 @@ static int orte_filem_rsh_start_copy(orte_filem_base_request_t *request) {
|
||||
f_set->remote_target));
|
||||
orte_show_help("help-orte-filem-rsh.txt",
|
||||
"orte-filem-rsh:get-file-not-exist",
|
||||
true, f_set->local_target, orte_process_info.nodename);
|
||||
true, f_set->local_target, orte_proc_info.nodename);
|
||||
request->is_done[cur_index] = true;
|
||||
request->is_active[cur_index] = true;
|
||||
request->exit_status[cur_index] = -1;
|
||||
@ -645,7 +645,7 @@ static int orte_filem_rsh_start_copy(orte_filem_base_request_t *request) {
|
||||
f_set->local_target));
|
||||
orte_show_help("help-orte-filem-rsh.txt",
|
||||
"orte-filem-rsh:get-file-exists",
|
||||
true, f_set->local_target, orte_process_info.nodename);
|
||||
true, f_set->local_target, orte_proc_info.nodename);
|
||||
request->is_done[cur_index] = true;
|
||||
request->is_active[cur_index] = true;
|
||||
request->exit_status[cur_index] = -1;
|
||||
|
@ -88,7 +88,7 @@ static int init(void)
|
||||
/* if we are a daemon or the hnp, we need to post a
|
||||
* recv to catch any collective operations
|
||||
*/
|
||||
if (orte_process_info.daemon || orte_process_info.hnp) {
|
||||
if (orte_proc_info.daemon || orte_proc_info.hnp) {
|
||||
if (ORTE_SUCCESS != (rc = orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD,
|
||||
ORTE_RML_TAG_DAEMON_COLLECTIVE,
|
||||
ORTE_RML_NON_PERSISTENT,
|
||||
@ -111,7 +111,7 @@ static void finalize(void)
|
||||
/* if we are a daemon or the hnp, we need to cancel the
|
||||
* recv we posted
|
||||
*/
|
||||
if (orte_process_info.daemon || orte_process_info.hnp) {
|
||||
if (orte_proc_info.daemon || orte_proc_info.hnp) {
|
||||
orte_rml.recv_cancel(ORTE_NAME_WILDCARD, ORTE_RML_TAG_DAEMON_COLLECTIVE);
|
||||
}
|
||||
}
|
||||
@ -203,7 +203,7 @@ static int xcast(orte_jobid_t job,
|
||||
* fire right away, but that's okay
|
||||
* The macro makes a copy of the buffer, so it's okay to release it here
|
||||
*/
|
||||
if (orte_process_info.hnp) {
|
||||
if (orte_proc_info.hnp) {
|
||||
ORTE_MESSAGE_EVENT(ORTE_PROC_MY_NAME, &buf, ORTE_RML_TAG_DAEMON, orte_daemon_cmd_processor);
|
||||
} else {
|
||||
/* otherwise, send it to the HNP for relay */
|
||||
@ -542,7 +542,7 @@ static int daemon_collective(orte_process_name_t *sender, opal_buffer_t *data)
|
||||
|
||||
if (jobdat->num_collected == jobdat->num_participating) {
|
||||
/* if I am the HNP, go process the results */
|
||||
if (orte_process_info.hnp) {
|
||||
if (orte_proc_info.hnp) {
|
||||
goto hnp_process;
|
||||
}
|
||||
|
||||
|
@ -79,7 +79,7 @@ int orte_grpcomm_base_full_modex(opal_list_t *procs, bool modex_db)
|
||||
}
|
||||
|
||||
/* pack our hostname */
|
||||
if (ORTE_SUCCESS != (rc = opal_dss.pack(&buf, &orte_process_info.nodename, 1, OPAL_STRING))) {
|
||||
if (ORTE_SUCCESS != (rc = opal_dss.pack(&buf, &orte_proc_info.nodename, 1, OPAL_STRING))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto cleanup;
|
||||
}
|
||||
@ -91,7 +91,7 @@ int orte_grpcomm_base_full_modex(opal_list_t *procs, bool modex_db)
|
||||
}
|
||||
|
||||
/* pack our arch */
|
||||
if (ORTE_SUCCESS != (rc = opal_dss.pack(&buf, &orte_process_info.arch, 1, OPAL_UINT32))) {
|
||||
if (ORTE_SUCCESS != (rc = opal_dss.pack(&buf, &orte_proc_info.arch, 1, OPAL_UINT32))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto cleanup;
|
||||
}
|
||||
@ -350,7 +350,7 @@ int orte_grpcomm_base_peer_modex(bool modex_db)
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
if (ORTE_SUCCESS != (rc = opal_dss.pack(&buf, &orte_process_info.arch, 1, OPAL_UINT32))) {
|
||||
if (ORTE_SUCCESS != (rc = opal_dss.pack(&buf, &orte_proc_info.arch, 1, OPAL_UINT32))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto cleanup;
|
||||
}
|
||||
|
@ -104,17 +104,17 @@ static int init(void)
|
||||
ORTE_ERROR_LOG(rc);
|
||||
}
|
||||
|
||||
if (opal_profile && orte_process_info.mpi_proc) {
|
||||
if (opal_profile && orte_proc_info.mpi_proc) {
|
||||
/* if I am an MPI application proc, then create a buffer
|
||||
* to pack all my attributes in */
|
||||
profile_buf = OBJ_NEW(opal_buffer_t);
|
||||
/* seed it with the node name */
|
||||
if (ORTE_SUCCESS != (rc = opal_dss.pack(profile_buf, &orte_process_info.nodename, 1, OPAL_STRING))) {
|
||||
if (ORTE_SUCCESS != (rc = opal_dss.pack(profile_buf, &orte_proc_info.nodename, 1, OPAL_STRING))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
}
|
||||
}
|
||||
|
||||
if (orte_process_info.hnp && recv_on) {
|
||||
if (orte_proc_info.hnp && recv_on) {
|
||||
/* open the profile file for writing */
|
||||
if (NULL == opal_profile_file) {
|
||||
/* no file specified - we will just ignore any incoming data */
|
||||
@ -140,7 +140,7 @@ static int init(void)
|
||||
/* if we are a daemon or the hnp, we need to post a
|
||||
* recv to catch any collective operations
|
||||
*/
|
||||
if (orte_process_info.daemon || orte_process_info.hnp) {
|
||||
if (orte_proc_info.daemon || orte_proc_info.hnp) {
|
||||
if (ORTE_SUCCESS != (rc = orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD,
|
||||
ORTE_RML_TAG_DAEMON_COLLECTIVE,
|
||||
ORTE_RML_NON_PERSISTENT,
|
||||
@ -163,7 +163,7 @@ static void finalize(void)
|
||||
|
||||
orte_grpcomm_base_modex_finalize();
|
||||
|
||||
if (opal_profile && orte_process_info.mpi_proc) {
|
||||
if (opal_profile && orte_proc_info.mpi_proc) {
|
||||
/* if I am an MPI proc, send my buffer to the collector */
|
||||
boptr = &bo;
|
||||
opal_dss.unload(profile_buf, (void**)&boptr->bytes, &boptr->size);
|
||||
@ -177,7 +177,7 @@ static void finalize(void)
|
||||
OBJ_DESTRUCT(&profile);
|
||||
}
|
||||
|
||||
if (orte_process_info.hnp && recv_on) {
|
||||
if (orte_proc_info.hnp && recv_on) {
|
||||
/* if we are profiling and I am the HNP, then stop the
|
||||
* profiling receive
|
||||
*/
|
||||
@ -191,7 +191,7 @@ static void finalize(void)
|
||||
/* if we are a daemon or the hnp, we need to cancel the
|
||||
* recv we posted
|
||||
*/
|
||||
if (orte_process_info.daemon || orte_process_info.hnp) {
|
||||
if (orte_proc_info.daemon || orte_proc_info.hnp) {
|
||||
orte_rml.recv_cancel(ORTE_NAME_WILDCARD, ORTE_RML_TAG_DAEMON_COLLECTIVE);
|
||||
}
|
||||
}
|
||||
@ -283,7 +283,7 @@ static int xcast(orte_jobid_t job,
|
||||
* fire right away, but that's okay
|
||||
* The macro makes a copy of the buffer, so it's okay to release it here
|
||||
*/
|
||||
if (orte_process_info.hnp) {
|
||||
if (orte_proc_info.hnp) {
|
||||
ORTE_MESSAGE_EVENT(ORTE_PROC_MY_NAME, &buf, ORTE_RML_TAG_DAEMON, orte_daemon_cmd_processor);
|
||||
} else {
|
||||
/* otherwise, send it to the HNP for relay */
|
||||
@ -930,7 +930,7 @@ static int daemon_collective(orte_process_name_t *sender, opal_buffer_t *data)
|
||||
|
||||
if (jobdat->num_collected == jobdat->num_participating) {
|
||||
/* if I am the HNP, go process the results */
|
||||
if (orte_process_info.hnp) {
|
||||
if (orte_proc_info.hnp) {
|
||||
goto hnp_process;
|
||||
}
|
||||
|
||||
|
@ -209,7 +209,7 @@ static int xcast(orte_jobid_t job,
|
||||
* fire right away, but that's okay
|
||||
* The macro makes a copy of the buffer, so it's okay to release it here
|
||||
*/
|
||||
if (orte_process_info.hnp) {
|
||||
if (orte_proc_info.hnp) {
|
||||
ORTE_MESSAGE_EVENT(ORTE_PROC_MY_NAME, &buf, ORTE_RML_TAG_DAEMON, orte_daemon_cmd_processor);
|
||||
} else {
|
||||
/* otherwise, send it to the HNP for relay */
|
||||
@ -317,13 +317,13 @@ static int allgather(opal_buffer_t *sbuf, opal_buffer_t *rbuf)
|
||||
*/
|
||||
if (0 == my_local_rank) {
|
||||
/* we need one entry/node in this job */
|
||||
my_coll_peers = (orte_vpid_t*)malloc(orte_process_info.num_nodes * sizeof(orte_vpid_t));
|
||||
my_coll_peers = (orte_vpid_t*)malloc(orte_proc_info.num_nodes * sizeof(orte_vpid_t));
|
||||
cpeers = 0;
|
||||
}
|
||||
|
||||
/* cycle through the procs to create a list of those that are local to me */
|
||||
proc.jobid = ORTE_PROC_MY_NAME->jobid;
|
||||
for (v=0; v < orte_process_info.num_procs; v++) {
|
||||
for (v=0; v < orte_proc_info.num_procs; v++) {
|
||||
proc.vpid = v;
|
||||
/* is this proc local_rank=0 on its node? */
|
||||
if (0 == my_local_rank && 0 == orte_ess.get_local_rank(&proc)) {
|
||||
|
@ -47,7 +47,7 @@ int orte_iof_base_close(void)
|
||||
OBJ_DESTRUCT(&orte_iof_base.iof_components_opened);
|
||||
|
||||
OPAL_THREAD_LOCK(&orte_iof_base.iof_write_output_lock);
|
||||
if (!orte_process_info.daemon) {
|
||||
if (!orte_proc_info.daemon) {
|
||||
/* check if anything is still trying to be written out */
|
||||
wev = orte_iof_base.iof_write_stdout->wev;
|
||||
if (!opal_list_is_empty(&wev->outputs)) {
|
||||
|
@ -192,7 +192,7 @@ int orte_iof_base_open(void)
|
||||
}
|
||||
|
||||
/* daemons do not need to do this as they do not write out stdout/err */
|
||||
if (!orte_process_info.daemon) {
|
||||
if (!orte_proc_info.daemon) {
|
||||
/* setup the stdout event */
|
||||
ORTE_IOF_SINK_DEFINE(&orte_iof_base.iof_write_stdout, ORTE_PROC_MY_NAME,
|
||||
1, ORTE_IOF_STDOUT, orte_iof_base_write_handler, NULL);
|
||||
|
@ -131,7 +131,7 @@ static int orte_iof_hnp_query(mca_base_module_t **module, int *priority)
|
||||
*priority = -1;
|
||||
|
||||
/* if we are not the HNP, then don't use this module */
|
||||
if (!orte_process_info.hnp) {
|
||||
if (!orte_proc_info.hnp) {
|
||||
return ORTE_ERROR;
|
||||
}
|
||||
|
||||
|
@ -115,7 +115,7 @@ static int orte_iof_orted_query(mca_base_module_t **module, int *priority)
|
||||
*priority = -1;
|
||||
|
||||
/* if we are not a daemon, then don't use this module */
|
||||
if (!orte_process_info.daemon) {
|
||||
if (!orte_proc_info.daemon) {
|
||||
return ORTE_ERROR;
|
||||
}
|
||||
|
||||
|
@ -106,7 +106,7 @@ static int orte_iof_tool_query(mca_base_module_t **module, int *priority)
|
||||
*priority = -1;
|
||||
|
||||
/* if we are not a tool, then don't use this module */
|
||||
if (!orte_process_info.tool) {
|
||||
if (!orte_proc_info.tool) {
|
||||
return ORTE_ERROR;
|
||||
}
|
||||
|
||||
|
@ -119,7 +119,7 @@ static void mypeerlog(int severity, int errcode, orte_process_name_t *peer_proc,
|
||||
peer_name ? peer_name : "UNKNOWN",
|
||||
peer_host ? peer_host : "UNKNOWN",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
orte_process_info.nodename);
|
||||
orte_proc_info.nodename);
|
||||
space -= len;
|
||||
pos += len;
|
||||
|
||||
|
@ -662,7 +662,7 @@ REPORT_ERROR:
|
||||
/* if we are the HNP, then we would rather not send this to ourselves -
|
||||
* instead, we queue it up for local processing
|
||||
*/
|
||||
if (orte_process_info.hnp) {
|
||||
if (orte_proc_info.hnp) {
|
||||
ORTE_MESSAGE_EVENT(ORTE_PROC_MY_NAME, &alert,
|
||||
ORTE_RML_TAG_APP_LAUNCH_CALLBACK,
|
||||
orte_plm_base_app_report_launch);
|
||||
@ -759,14 +759,14 @@ static int odls_base_default_setup_fork(orte_app_context_t *context,
|
||||
|
||||
/* pass my contact info to the local proc so we can talk */
|
||||
param = mca_base_param_environ_variable("orte","local_daemon","uri");
|
||||
opal_setenv(param, orte_process_info.my_daemon_uri, true, environ_copy);
|
||||
opal_setenv(param, orte_proc_info.my_daemon_uri, true, environ_copy);
|
||||
free(param);
|
||||
|
||||
/* pass the hnp's contact info to the local proc in case it
|
||||
* needs it
|
||||
*/
|
||||
param = mca_base_param_environ_variable("orte","hnp","uri");
|
||||
opal_setenv(param, orte_process_info.my_hnp_uri, true, environ_copy);
|
||||
opal_setenv(param, orte_proc_info.my_hnp_uri, true, environ_copy);
|
||||
free(param);
|
||||
|
||||
/* setup yield schedule - do not override any user-supplied directive! */
|
||||
@ -1419,7 +1419,7 @@ CLEANUP:
|
||||
/* if we are the HNP, then we would rather not send this to ourselves -
|
||||
* instead, we queue it up for local processing
|
||||
*/
|
||||
if (orte_process_info.hnp) {
|
||||
if (orte_proc_info.hnp) {
|
||||
ORTE_MESSAGE_EVENT(ORTE_PROC_MY_NAME, &alert,
|
||||
ORTE_RML_TAG_APP_LAUNCH_CALLBACK,
|
||||
orte_plm_base_app_report_launch);
|
||||
@ -1817,7 +1817,7 @@ int orte_odls_base_default_require_sync(orte_process_name_t *proc,
|
||||
/* if we are the HNP, then we would rather not send this to ourselves -
|
||||
* instead, we queue it up for local processing
|
||||
*/
|
||||
if (orte_process_info.hnp) {
|
||||
if (orte_proc_info.hnp) {
|
||||
ORTE_MESSAGE_EVENT(ORTE_PROC_MY_NAME, &buffer,
|
||||
ORTE_RML_TAG_INIT_ROUTES,
|
||||
orte_routed_base_process_msg);
|
||||
@ -1923,7 +1923,7 @@ static void check_proc_complete(orte_odls_child_t *child)
|
||||
/* if we are the HNP, then we would rather not send this to ourselves -
|
||||
* instead, we queue it up for local processing
|
||||
*/
|
||||
if (orte_process_info.hnp) {
|
||||
if (orte_proc_info.hnp) {
|
||||
ORTE_MESSAGE_EVENT(ORTE_PROC_MY_NAME, &alert,
|
||||
ORTE_RML_TAG_PLM,
|
||||
orte_plm_base_receive_process_msg);
|
||||
@ -1992,7 +1992,7 @@ static void check_proc_complete(orte_odls_child_t *child)
|
||||
/* if we are the HNP, then we would rather not send this to ourselves -
|
||||
* instead, we queue it up for local processing
|
||||
*/
|
||||
if (orte_process_info.hnp) {
|
||||
if (orte_proc_info.hnp) {
|
||||
ORTE_MESSAGE_EVENT(ORTE_PROC_MY_NAME, &alert,
|
||||
ORTE_RML_TAG_PLM,
|
||||
orte_plm_base_receive_process_msg);
|
||||
@ -2142,8 +2142,8 @@ GOTCHILD:
|
||||
free(job);
|
||||
goto MOVEON;
|
||||
}
|
||||
abort_file = opal_os_path(false, orte_process_info.tmpdir_base,
|
||||
orte_process_info.top_session_dir,
|
||||
abort_file = opal_os_path(false, orte_proc_info.tmpdir_base,
|
||||
orte_proc_info.top_session_dir,
|
||||
job, vpid, "abort", NULL );
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_odls_globals.output,
|
||||
"%s odls:waitpid_fired checking abort file %s",
|
||||
@ -2431,7 +2431,7 @@ int orte_odls_base_default_kill_local_procs(orte_jobid_t job, bool set_state,
|
||||
if (0 != (err = kill_local(child->pid, SIGTERM))) {
|
||||
orte_show_help("help-odls-default.txt",
|
||||
"odls-default:could-not-send-kill",
|
||||
true, orte_process_info.nodename, child->pid, err);
|
||||
true, orte_proc_info.nodename, child->pid, err);
|
||||
/* check the proc state - ensure it is in one of the termination
|
||||
* states so that we properly wakeup
|
||||
*/
|
||||
@ -2457,7 +2457,7 @@ int orte_odls_base_default_kill_local_procs(orte_jobid_t job, bool set_state,
|
||||
if (!child_died(child->pid, orte_odls_globals.timeout_before_sigkill, &exit_status)) {
|
||||
orte_show_help("help-odls-default.txt",
|
||||
"odls-default:could-not-kill",
|
||||
true, orte_process_info.nodename, child->pid);
|
||||
true, orte_proc_info.nodename, child->pid);
|
||||
}
|
||||
}
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_odls_globals.output,
|
||||
@ -2486,7 +2486,7 @@ RECORD:
|
||||
/* if we are the HNP, then we would rather not send this to ourselves -
|
||||
* instead, we queue it up for local processing
|
||||
*/
|
||||
if (orte_process_info.hnp) {
|
||||
if (orte_proc_info.hnp) {
|
||||
ORTE_MESSAGE_EVENT(ORTE_PROC_MY_NAME, &alert,
|
||||
ORTE_RML_TAG_PLM,
|
||||
orte_plm_base_receive_process_msg);
|
||||
@ -2538,10 +2538,10 @@ int orte_odls_base_get_proc_stats(opal_buffer_t *answer,
|
||||
|
||||
OBJ_CONSTRUCT(&stats, opal_pstats_t);
|
||||
/* record node up to first '.' */
|
||||
for (j=0; j < (int)strlen(orte_process_info.nodename) &&
|
||||
for (j=0; j < (int)strlen(orte_proc_info.nodename) &&
|
||||
j < OPAL_PSTAT_MAX_STRING_LEN-1 &&
|
||||
orte_process_info.nodename[j] != '.'; j++) {
|
||||
stats.node[j] = orte_process_info.nodename[j];
|
||||
orte_proc_info.nodename[j] != '.'; j++) {
|
||||
stats.node[j] = orte_proc_info.nodename[j];
|
||||
}
|
||||
/* record rank */
|
||||
stats.rank = child->name->vpid;
|
||||
|
@ -70,7 +70,7 @@ int orte_odls_base_preload_files_app_context(orte_app_context_t* app_context)
|
||||
|
||||
/* Define the process set */
|
||||
p_set = OBJ_NEW(orte_filem_base_process_set_t);
|
||||
if( orte_process_info.hnp ) {
|
||||
if( orte_proc_info.hnp ) {
|
||||
/* if I am the HNP, then use me as the source */
|
||||
p_set->source.jobid = ORTE_PROC_MY_NAME->jobid;
|
||||
p_set->source.vpid = ORTE_PROC_MY_NAME->vpid;
|
||||
@ -152,7 +152,7 @@ static int orte_odls_base_preload_append_binary(orte_app_context_t* context,
|
||||
f_set = OBJ_NEW(orte_filem_base_file_set_t);
|
||||
|
||||
/* Local Placement */
|
||||
asprintf(&local_bin, "%s/%s", orte_process_info.job_session_dir, opal_basename(context->app));
|
||||
asprintf(&local_bin, "%s/%s", orte_proc_info.job_session_dir, opal_basename(context->app));
|
||||
if(orte_odls_base_is_preload_local_dup(local_bin, filem_request) ) {
|
||||
goto cleanup;
|
||||
}
|
||||
@ -222,7 +222,7 @@ static int orte_odls_base_preload_append_files(orte_app_context_t* context,
|
||||
}
|
||||
|
||||
/* If this is the HNP, then source = sink, so use the same path for each local and remote */
|
||||
if( orte_process_info.hnp ) {
|
||||
if( orte_proc_info.hnp ) {
|
||||
free(remote_targets[i]);
|
||||
remote_targets[i] = strdup(local_ref);
|
||||
}
|
||||
|
@ -136,7 +136,7 @@ static char *
|
||||
false, false, NULL, &user);
|
||||
|
||||
if (0 > asprintf(&frontend, OPAL_PATH_SEP"%s"OPAL_PATH_SEP"openmpi-bproc-%s",
|
||||
orte_process_info.tmpdir_base, user)) {
|
||||
orte_proc_info.tmpdir_base, user)) {
|
||||
ORTE_ERROR_LOG(ORTE_ERROR);
|
||||
path = NULL;
|
||||
}
|
||||
@ -524,7 +524,7 @@ int orte_odls_bproc_finalize(void)
|
||||
{
|
||||
orte_iof.iof_flush();
|
||||
odls_bproc_remove_dir();
|
||||
orte_session_dir_finalize(orte_process_info.my_name);
|
||||
orte_session_dir_finalize(orte_proc_info.my_name);
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
|
@ -546,14 +546,14 @@ mca_oob_tcp_create_listen(int *target_sd, unsigned short *target_port, uint16_t
|
||||
port in the range. Otherwise, tcp_port_min will be 0, which
|
||||
means "pick any port" */
|
||||
if (AF_INET == af_family) {
|
||||
if (orte_process_info.daemon) {
|
||||
if (orte_proc_info.daemon) {
|
||||
/* if static ports were provided, the daemon takes the
|
||||
* first entry in the list - otherwise, we "pick any port"
|
||||
*/
|
||||
if (NULL != mca_oob_tcp_component.tcp4_static_ports) {
|
||||
port = strtol(mca_oob_tcp_component.tcp4_static_ports[0], NULL, 10);
|
||||
/* save the port for later use */
|
||||
orte_process_info.my_port = port;
|
||||
orte_proc_info.my_port = port;
|
||||
/* convert it to network-byte-order */
|
||||
port = htons(port);
|
||||
/* flag that we are using static ports */
|
||||
@ -562,7 +562,7 @@ mca_oob_tcp_create_listen(int *target_sd, unsigned short *target_port, uint16_t
|
||||
port = 0;
|
||||
orte_static_ports = false;
|
||||
}
|
||||
} else if (orte_process_info.mpi_proc) {
|
||||
} else if (orte_proc_info.mpi_proc) {
|
||||
/* if static ports were provided, an mpi proc takes its
|
||||
* node_local_rank entry in the list IF it has that info
|
||||
* AND enough ports were provided - otherwise, we "pick any port"
|
||||
@ -575,7 +575,7 @@ mca_oob_tcp_create_listen(int *target_sd, unsigned short *target_port, uint16_t
|
||||
/* any daemon takes the first entry, so we start with the second */
|
||||
port = strtol(mca_oob_tcp_component.tcp4_static_ports[nrank+1], NULL, 10);
|
||||
/* save the port for later use */
|
||||
orte_process_info.my_port = port;
|
||||
orte_proc_info.my_port = port;
|
||||
/* convert it to network-byte-order */
|
||||
port = htons(port);
|
||||
/* flag that we are using static ports */
|
||||
@ -599,14 +599,14 @@ mca_oob_tcp_create_listen(int *target_sd, unsigned short *target_port, uint16_t
|
||||
|
||||
#if OPAL_WANT_IPV6
|
||||
if (AF_INET6 == af_family) {
|
||||
if (orte_process_info.daemon) {
|
||||
if (orte_proc_info.daemon) {
|
||||
/* if static ports were provided, the daemon takes the
|
||||
* first entry in the list - otherwise, we "pick any port"
|
||||
*/
|
||||
if (NULL != mca_oob_tcp_component.tcp6_static_ports) {
|
||||
port = strtol(mca_oob_tcp_component.tcp6_static_ports[0], NULL, 10);
|
||||
/* save the port for later use */
|
||||
orte_process_info.my_port = port;
|
||||
orte_proc_info.my_port = port;
|
||||
/* convert it to network-byte-order */
|
||||
port = htons(port);
|
||||
/* flag that we are using static ports */
|
||||
@ -615,7 +615,7 @@ mca_oob_tcp_create_listen(int *target_sd, unsigned short *target_port, uint16_t
|
||||
port = 0;
|
||||
orte_static_ports = false;
|
||||
}
|
||||
} else if (orte_process_info.mpi_proc) {
|
||||
} else if (orte_proc_info.mpi_proc) {
|
||||
/* if static ports were provided, an mpi proc takes its
|
||||
* node_local_rank entry in the list IF it has that info
|
||||
* AND enough ports were provided - otherwise, we "pick any port"
|
||||
@ -628,7 +628,7 @@ mca_oob_tcp_create_listen(int *target_sd, unsigned short *target_port, uint16_t
|
||||
/* any daemon takes the first entry, so we start with the second */
|
||||
port = strtol(mca_oob_tcp_component.tcp6_static_ports[nrank+1], NULL, 10);
|
||||
/* save the port for later use */
|
||||
orte_process_info.my_port = port;
|
||||
orte_proc_info.my_port = port;
|
||||
/* convert it to network-byte-order */
|
||||
port = htons(port);
|
||||
/* flag that we are using static ports */
|
||||
@ -701,7 +701,7 @@ mca_oob_tcp_create_listen(int *target_sd, unsigned short *target_port, uint16_t
|
||||
/* if we dynamically assigned the port, save it here,
|
||||
* remembering to convert it back from network byte order first
|
||||
*/
|
||||
orte_process_info.my_port = ntohs(*target_port);
|
||||
orte_proc_info.my_port = ntohs(*target_port);
|
||||
}
|
||||
|
||||
/* setup listen backlog to maximum allowed by kernel */
|
||||
@ -1368,7 +1368,7 @@ int mca_oob_tcp_init(void)
|
||||
{
|
||||
orte_jobid_t jobid;
|
||||
int rc;
|
||||
int randval = orte_process_info.num_procs;
|
||||
int randval = orte_proc_info.num_procs;
|
||||
|
||||
if (0 == randval) randval = 10;
|
||||
|
||||
@ -1387,10 +1387,10 @@ int mca_oob_tcp_init(void)
|
||||
jobid = ORTE_PROC_MY_NAME->jobid;
|
||||
|
||||
/* Fix up the listen type. This is the first call into the OOB in
|
||||
which the orte_process_info.hnp field is reliably set. The
|
||||
which the orte_proc_info.hnp field is reliably set. The
|
||||
listen_mode should only be listen_thread for the HNP -- all
|
||||
others should use the traditional event library. */
|
||||
if (!orte_process_info.hnp) {
|
||||
if (!orte_proc_info.hnp) {
|
||||
mca_oob_tcp_component.tcp_listen_type = OOB_TCP_EVENT;
|
||||
}
|
||||
|
||||
|
@ -478,7 +478,7 @@ static void mca_oob_tcp_msg_data(mca_oob_tcp_msg_t* msg, mca_oob_tcp_peer_t* pee
|
||||
* another job family - procs dont' need to do this because
|
||||
* they always route through their daemons anyway
|
||||
*/
|
||||
if (!orte_process_info.mpi_proc) {
|
||||
if (!orte_proc_info.mpi_proc) {
|
||||
if ((ORTE_JOB_FAMILY(msg->msg_hdr.msg_origin.jobid) !=
|
||||
ORTE_JOB_FAMILY(ORTE_PROC_MY_NAME->jobid)) &&
|
||||
(0 != ORTE_JOB_FAMILY(msg->msg_hdr.msg_origin.jobid))) {
|
||||
|
@ -39,7 +39,7 @@ int orte_plm_base_finalize(void)
|
||||
orte_plm.finalize();
|
||||
|
||||
/* if we are the HNP, then stop our receive */
|
||||
if (orte_process_info.hnp) {
|
||||
if (orte_proc_info.hnp) {
|
||||
if (ORTE_SUCCESS != (rc = orte_plm_base_comm_stop())) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
|
@ -41,9 +41,9 @@ int orte_plm_base_set_hnp_name(void)
|
||||
uint32_t bias;
|
||||
|
||||
/* hash the nodename */
|
||||
OPAL_HASH_STR(orte_process_info.nodename, hash32);
|
||||
OPAL_HASH_STR(orte_proc_info.nodename, hash32);
|
||||
|
||||
bias = (uint32_t)orte_process_info.pid;
|
||||
bias = (uint32_t)orte_proc_info.pid;
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output,
|
||||
"plm:base:set_hnp_name: initial bias %ld nodename hash %lu",
|
||||
|
@ -151,7 +151,7 @@ int orte_plm_base_setup_job(orte_job_t *jdata)
|
||||
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
|
||||
return ORTE_ERR_NOT_FOUND;
|
||||
}
|
||||
if (orte_process_info.num_procs != jdatorted->num_procs) {
|
||||
if (orte_proc_info.num_procs != jdatorted->num_procs) {
|
||||
/* more daemons are being launched - update the routing tree to
|
||||
* ensure that the HNP knows how to route messages via
|
||||
* the daemon routing tree - this needs to be done
|
||||
@ -159,7 +159,7 @@ int orte_plm_base_setup_job(orte_job_t *jdata)
|
||||
* hasn't unpacked its launch message prior to being
|
||||
* asked to communicate.
|
||||
*/
|
||||
orte_process_info.num_procs = jdatorted->num_procs;
|
||||
orte_proc_info.num_procs = jdatorted->num_procs;
|
||||
if (ORTE_SUCCESS != (rc = orte_routed.update_routing_tree())) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
@ -1012,11 +1012,11 @@ int orte_plm_base_orted_append_basic_args(int *argc, char ***argv,
|
||||
}
|
||||
|
||||
/* pass the total number of daemons that will be in the system */
|
||||
if (orte_process_info.hnp) {
|
||||
if (orte_proc_info.hnp) {
|
||||
jdata = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid);
|
||||
num_procs = jdata->num_procs;
|
||||
} else {
|
||||
num_procs = orte_process_info.num_procs;
|
||||
num_procs = orte_proc_info.num_procs;
|
||||
}
|
||||
opal_argv_append(argc, argv, "-mca");
|
||||
opal_argv_append(argc, argv, "orte_ess_num_procs");
|
||||
@ -1025,10 +1025,10 @@ int orte_plm_base_orted_append_basic_args(int *argc, char ***argv,
|
||||
free(param);
|
||||
|
||||
/* pass the uri of the hnp */
|
||||
if (orte_process_info.hnp) {
|
||||
if (orte_proc_info.hnp) {
|
||||
rml_uri = orte_rml.get_contact_info();
|
||||
} else {
|
||||
rml_uri = orte_process_info.my_hnp_uri;
|
||||
rml_uri = orte_proc_info.my_hnp_uri;
|
||||
}
|
||||
asprintf(¶m, "\"%s\"", rml_uri);
|
||||
opal_argv_append(argc, argv, "--hnp-uri");
|
||||
@ -1039,7 +1039,7 @@ int orte_plm_base_orted_append_basic_args(int *argc, char ***argv,
|
||||
* being sure to "purge" any that would cause problems
|
||||
* on backend nodes
|
||||
*/
|
||||
if (orte_process_info.hnp) {
|
||||
if (orte_proc_info.hnp) {
|
||||
cnt = opal_argv_count(orted_cmd_line);
|
||||
for (i=0; i < cnt; i+=3) {
|
||||
/* if the specified option is more than one word, we don't
|
||||
|
@ -278,7 +278,7 @@ int orte_plm_base_orted_kill_local_procs(orte_jobid_t job)
|
||||
* fire right away, but that's okay
|
||||
* The macro makes a copy of the buffer, so it's okay to release it here
|
||||
*/
|
||||
if (orte_process_info.hnp) {
|
||||
if (orte_proc_info.hnp) {
|
||||
ORTE_MESSAGE_EVENT(ORTE_PROC_MY_NAME, &cmd, ORTE_RML_TAG_DAEMON, orte_daemon_cmd_processor);
|
||||
}
|
||||
|
||||
|
@ -312,7 +312,7 @@ void orte_plm_base_receive_process_msg(int fd, short event, void *data)
|
||||
OBJ_DESTRUCT(&answer);
|
||||
|
||||
/* see if an error occurred - if so, wakeup the HNP so we can exit */
|
||||
if (orte_process_info.hnp && ORTE_SUCCESS != rc) {
|
||||
if (orte_proc_info.hnp && ORTE_SUCCESS != rc) {
|
||||
orte_trigger_event(&orte_exit);
|
||||
}
|
||||
}
|
||||
|
@ -186,7 +186,7 @@ int orte_plm_base_local_slave_launch(orte_job_t *jdata)
|
||||
OBJ_DESTRUCT(&hosts);
|
||||
|
||||
/* is this a local operation? */
|
||||
if (0 == strcmp(orte_process_info.nodename, nodename)) {
|
||||
if (0 == strcmp(orte_proc_info.nodename, nodename)) {
|
||||
local_op = true;
|
||||
}
|
||||
|
||||
@ -456,7 +456,7 @@ int orte_plm_base_local_slave_launch(orte_job_t *jdata)
|
||||
* required to pass existence tests
|
||||
*/
|
||||
param = mca_base_param_environ_variable("orte","hnp","uri");
|
||||
asprintf(&path, "\"%s\"", orte_process_info.my_hnp_uri);
|
||||
asprintf(&path, "\"%s\"", orte_proc_info.my_hnp_uri);
|
||||
opal_setenv(param, path, true, &argv);
|
||||
free(param);
|
||||
free(path);
|
||||
|
@ -53,7 +53,7 @@ int orte_plm_base_select(void)
|
||||
* If we didn't find one, and we are a daemon, then default to retaining the proxy.
|
||||
* Otherwise, if we didn't find one to select, that is unacceptable.
|
||||
*/
|
||||
if (orte_process_info.daemon) {
|
||||
if (orte_proc_info.daemon) {
|
||||
/* don't record a selected component or flag selected
|
||||
* so we finalize correctly - just leave the plm alone
|
||||
* as it defaults to pointing at the proxy
|
||||
|
@ -356,12 +356,12 @@ static void orte_plm_bproc_setup_env(char *** env)
|
||||
}
|
||||
|
||||
/* ns replica contact info */
|
||||
if(NULL == orte_process_info.ns_replica) {
|
||||
orte_dss.copy((void**)&orte_process_info.ns_replica, orte_process_info.my_name, ORTE_NAME);
|
||||
orte_process_info.ns_replica_uri = orte_rml.get_uri();
|
||||
if(NULL == orte_proc_info.ns_replica) {
|
||||
orte_dss.copy((void**)&orte_proc_info.ns_replica, orte_proc_info.my_name, ORTE_NAME);
|
||||
orte_proc_info.ns_replica_uri = orte_rml.get_uri();
|
||||
}
|
||||
var = mca_base_param_environ_variable("ns","replica","uri");
|
||||
opal_setenv(var,orte_process_info.ns_replica_uri, true, env);
|
||||
opal_setenv(var,orte_proc_info.ns_replica_uri, true, env);
|
||||
free(var);
|
||||
|
||||
/* make sure the username used to create the bproc directory is the same on
|
||||
@ -371,12 +371,12 @@ static void orte_plm_bproc_setup_env(char *** env)
|
||||
free(var);
|
||||
|
||||
/* gpr replica contact info */
|
||||
if(NULL == orte_process_info.gpr_replica) {
|
||||
orte_dss.copy((void**)&orte_process_info.gpr_replica, orte_process_info.my_name, ORTE_NAME);
|
||||
orte_process_info.gpr_replica_uri = orte_rml.get_uri();
|
||||
if(NULL == orte_proc_info.gpr_replica) {
|
||||
orte_dss.copy((void**)&orte_proc_info.gpr_replica, orte_proc_info.my_name, ORTE_NAME);
|
||||
orte_proc_info.gpr_replica_uri = orte_rml.get_uri();
|
||||
}
|
||||
var = mca_base_param_environ_variable("gpr","replica","uri");
|
||||
opal_setenv(var,orte_process_info.gpr_replica_uri, true, env);
|
||||
opal_setenv(var,orte_proc_info.gpr_replica_uri, true, env);
|
||||
free(var);
|
||||
|
||||
/* universe directory - needs to match orted */
|
||||
|
@ -106,7 +106,7 @@ static int orte_smr_bproc_open(void)
|
||||
|
||||
static orte_smr_base_module_t* orte_smr_bproc_init(int *priority)
|
||||
{
|
||||
if (!orte_process_info.seed) {
|
||||
if (!orte_proc_info.seed) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
@ -144,7 +144,7 @@ static int orte_plm_ccp_component_query(mca_base_module_t **module, int *priorit
|
||||
}
|
||||
|
||||
/* if we are NOT an HNP, then don't select us */
|
||||
if (!orte_process_info.hnp) {
|
||||
if (!orte_proc_info.hnp) {
|
||||
pCluster->Release();
|
||||
*module = NULL;
|
||||
return ORTE_ERROR;
|
||||
|
@ -302,7 +302,7 @@ static void orte_plm_rsh_wait_daemon(pid_t pid, int status, void* cbdata)
|
||||
/* if we are not the HNP, send a message to the HNP alerting it
|
||||
* to the failure
|
||||
*/
|
||||
if (!orte_process_info.hnp) {
|
||||
if (!orte_proc_info.hnp) {
|
||||
opal_buffer_t buf;
|
||||
orte_vpid_t *vpid=(orte_vpid_t*)cbdata;
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||
@ -672,7 +672,7 @@ static int setup_launch(int *argcptr, char ***argvptr,
|
||||
* by enclosing them in quotes. Check for any multi-word
|
||||
* mca params passed to mpirun and include them
|
||||
*/
|
||||
if (orte_process_info.hnp) {
|
||||
if (orte_proc_info.hnp) {
|
||||
int cnt, i;
|
||||
cnt = opal_argv_count(orted_cmd_line);
|
||||
for (i=0; i < cnt; i+=3) {
|
||||
@ -852,7 +852,7 @@ static int remote_spawn(opal_buffer_t *launch)
|
||||
OBJ_RELEASE(item);
|
||||
}
|
||||
/* reconstruct the child list */
|
||||
find_children(0, 0, ORTE_PROC_MY_NAME->vpid, orte_process_info.num_procs);
|
||||
find_children(0, 0, ORTE_PROC_MY_NAME->vpid, orte_proc_info.num_procs);
|
||||
|
||||
/* if I have no children, just return */
|
||||
if (opal_list_is_empty(&mca_plm_rsh_component.children)) {
|
||||
@ -865,7 +865,7 @@ static int remote_spawn(opal_buffer_t *launch)
|
||||
}
|
||||
|
||||
/* setup the launch */
|
||||
if (ORTE_SUCCESS != (rc = setup_launch(&argc, &argv, orte_process_info.nodename, &node_name_index1,
|
||||
if (ORTE_SUCCESS != (rc = setup_launch(&argc, &argv, orte_proc_info.nodename, &node_name_index1,
|
||||
&proc_vpid_index, prefix))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto cleanup;
|
||||
|
@ -647,7 +647,7 @@ int orte_plm_submit_launch(orte_job_t *jdata)
|
||||
* match, check using ifislocal().
|
||||
*/
|
||||
if (!mca_plm_submit_component.force_submit &&
|
||||
(0 == strcmp(nodes[nnode]->name, orte_process_info.nodename) ||
|
||||
(0 == strcmp(nodes[nnode]->name, orte_proc_info.nodename) ||
|
||||
opal_ifislocal(nodes[nnode]->name))) {
|
||||
if (mca_plm_submit_component.debug) {
|
||||
opal_output(0, "plm:submit: %s is a LOCAL node\n",
|
||||
|
@ -86,7 +86,7 @@ static int ras_alps_open(void)
|
||||
static int orte_ras_alps_component_query(mca_base_module_t **module, int *priority)
|
||||
{
|
||||
/* if we are not an HNP, then we must not be selected */
|
||||
if (!orte_process_info.hnp) {
|
||||
if (!orte_proc_info.hnp) {
|
||||
*module = NULL;
|
||||
return ORTE_ERROR;
|
||||
}
|
||||
|
@ -288,10 +288,10 @@ int orte_ras_base_allocate(orte_job_t *jdata)
|
||||
OBJ_DESTRUCT(&nodes);
|
||||
return ORTE_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
/* use the same name we got in orte_process_info so we avoid confusion in
|
||||
/* use the same name we got in orte_proc_info so we avoid confusion in
|
||||
* the session directories
|
||||
*/
|
||||
node->name = strdup(orte_process_info.nodename);
|
||||
node->name = strdup(orte_proc_info.nodename);
|
||||
node->state = ORTE_NODE_STATE_UP;
|
||||
node->slots_inuse = 0;
|
||||
node->slots_max = 0;
|
||||
|
@ -105,7 +105,7 @@ static int orte_ras_ccp_component_query(mca_base_module_t **module, int *priorit
|
||||
}
|
||||
|
||||
/* if we are NOT an HNP, then don't select us */
|
||||
if (!orte_process_info.hnp) {
|
||||
if (!orte_proc_info.hnp) {
|
||||
pCluster->Release();
|
||||
*module = NULL;
|
||||
return ORTE_ERROR;
|
||||
|
@ -131,9 +131,9 @@ int orte_rml_base_update_contact_info(opal_buffer_t* data)
|
||||
* in our process_info struct so we can correctly route any messages
|
||||
*/
|
||||
if (ORTE_PROC_MY_NAME->jobid == name.jobid &&
|
||||
orte_process_info.daemon &&
|
||||
orte_process_info.num_procs < num_procs) {
|
||||
orte_process_info.num_procs = num_procs;
|
||||
orte_proc_info.daemon &&
|
||||
orte_proc_info.num_procs < num_procs) {
|
||||
orte_proc_info.num_procs = num_procs;
|
||||
/* if we changed it, then we better update the routed
|
||||
* tree so daemon collectives work correctly
|
||||
*/
|
||||
|
@ -57,7 +57,7 @@ int orte_routed_base_comm_start(void)
|
||||
{
|
||||
int rc;
|
||||
|
||||
if (recv_issued || !orte_process_info.hnp) {
|
||||
if (recv_issued || !orte_proc_info.hnp) {
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
@ -83,7 +83,7 @@ int orte_routed_base_comm_stop(void)
|
||||
{
|
||||
int rc;
|
||||
|
||||
if (!recv_issued || !orte_process_info.hnp) {
|
||||
if (!recv_issued || !orte_proc_info.hnp) {
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
|
@ -37,7 +37,7 @@ static void report_sync(int status, orte_process_name_t* sender,
|
||||
orte_rml_tag_t tag, void *cbdata)
|
||||
{
|
||||
/* just copy the payload to the sync_buf */
|
||||
opal_dss.copy_payload(orte_process_info.sync_buf, buffer);
|
||||
opal_dss.copy_payload(orte_proc_info.sync_buf, buffer);
|
||||
/* flag as complete */
|
||||
sync_recvd = true;
|
||||
}
|
||||
|
@ -112,9 +112,9 @@ static int finalize(void)
|
||||
/* if I am an application process, indicate that I am
|
||||
* truly finalizing prior to departure
|
||||
*/
|
||||
if (!orte_process_info.hnp &&
|
||||
!orte_process_info.daemon &&
|
||||
!orte_process_info.tool) {
|
||||
if (!orte_proc_info.hnp &&
|
||||
!orte_proc_info.daemon &&
|
||||
!orte_proc_info.tool) {
|
||||
if (ORTE_SUCCESS != (rc = orte_routed_base_register_sync(false))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
@ -122,7 +122,7 @@ static int finalize(void)
|
||||
}
|
||||
|
||||
/* if I am the HNP, I need to stop the comm recv */
|
||||
if (orte_process_info.hnp) {
|
||||
if (orte_proc_info.hnp) {
|
||||
orte_routed_base_comm_stop();
|
||||
}
|
||||
|
||||
@ -156,8 +156,8 @@ static int delete_route(orte_process_name_t *proc)
|
||||
/* if I am an application process, I don't have any routes
|
||||
* so there is nothing for me to do
|
||||
*/
|
||||
if (!orte_process_info.hnp && !orte_process_info.daemon &&
|
||||
!orte_process_info.tool) {
|
||||
if (!orte_proc_info.hnp && !orte_proc_info.daemon &&
|
||||
!orte_proc_info.tool) {
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
@ -177,7 +177,7 @@ static int delete_route(orte_process_name_t *proc)
|
||||
* in my routing table and thus have nothing to do
|
||||
* here, just return
|
||||
*/
|
||||
if (orte_process_info.daemon) {
|
||||
if (orte_proc_info.daemon) {
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
@ -224,8 +224,8 @@ static int update_route(orte_process_name_t *target,
|
||||
/* if I am an application process, we don't update the route since
|
||||
* we automatically route everything through the local daemon
|
||||
*/
|
||||
if (!orte_process_info.hnp && !orte_process_info.daemon &&
|
||||
!orte_process_info.tool) {
|
||||
if (!orte_proc_info.hnp && !orte_proc_info.daemon &&
|
||||
!orte_proc_info.tool) {
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
@ -252,7 +252,7 @@ static int update_route(orte_process_name_t *target,
|
||||
* anything to this job family via my HNP - so nothing to do
|
||||
* here, just return
|
||||
*/
|
||||
if (orte_process_info.daemon) {
|
||||
if (orte_proc_info.daemon) {
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
@ -318,8 +318,8 @@ static orte_process_name_t get_route(orte_process_name_t *target)
|
||||
}
|
||||
|
||||
/* if I am an application process, always route via my local daemon */
|
||||
if (!orte_process_info.hnp && !orte_process_info.daemon &&
|
||||
!orte_process_info.tool) {
|
||||
if (!orte_proc_info.hnp && !orte_proc_info.daemon &&
|
||||
!orte_proc_info.tool) {
|
||||
ret = ORTE_PROC_MY_DAEMON;
|
||||
goto found;
|
||||
}
|
||||
@ -337,7 +337,7 @@ static orte_process_name_t get_route(orte_process_name_t *target)
|
||||
/* IF THIS IS FOR A DIFFERENT JOB FAMILY... */
|
||||
if (ORTE_JOB_FAMILY(target->jobid) != ORTE_JOB_FAMILY(ORTE_PROC_MY_NAME->jobid)) {
|
||||
/* if I am a daemon, route this via the HNP */
|
||||
if (orte_process_info.daemon) {
|
||||
if (orte_proc_info.daemon) {
|
||||
ret = ORTE_PROC_MY_HNP;
|
||||
goto found;
|
||||
}
|
||||
@ -498,7 +498,7 @@ static int init_routes(orte_jobid_t job, opal_buffer_t *ndat)
|
||||
int rc;
|
||||
|
||||
/* if I am a tool, then I stand alone - there is nothing to do */
|
||||
if (orte_process_info.tool) {
|
||||
if (orte_proc_info.tool) {
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
@ -506,31 +506,31 @@ static int init_routes(orte_jobid_t job, opal_buffer_t *ndat)
|
||||
* from the data sent to me for launch and update the routing tables to
|
||||
* point at the daemon for each proc
|
||||
*/
|
||||
if (orte_process_info.daemon) {
|
||||
if (orte_proc_info.daemon) {
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_routed_base_output,
|
||||
"%s routed_binomial: init routes for daemon job %s\n\thnp_uri %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_JOBID_PRINT(job),
|
||||
(NULL == orte_process_info.my_hnp_uri) ? "NULL" : orte_process_info.my_hnp_uri));
|
||||
(NULL == orte_proc_info.my_hnp_uri) ? "NULL" : orte_proc_info.my_hnp_uri));
|
||||
|
||||
if (NULL == ndat) {
|
||||
/* indicates this is being called during orte_init.
|
||||
* Get the HNP's name for possible later use
|
||||
*/
|
||||
if (NULL == orte_process_info.my_hnp_uri) {
|
||||
if (NULL == orte_proc_info.my_hnp_uri) {
|
||||
/* fatal error */
|
||||
ORTE_ERROR_LOG(ORTE_ERR_FATAL);
|
||||
return ORTE_ERR_FATAL;
|
||||
}
|
||||
/* set the contact info into the hash table */
|
||||
if (ORTE_SUCCESS != (rc = orte_rml.set_contact_info(orte_process_info.my_hnp_uri))) {
|
||||
if (ORTE_SUCCESS != (rc = orte_rml.set_contact_info(orte_proc_info.my_hnp_uri))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return(rc);
|
||||
}
|
||||
|
||||
/* extract the hnp name and store it */
|
||||
if (ORTE_SUCCESS != (rc = orte_rml_base_parse_uris(orte_process_info.my_hnp_uri,
|
||||
if (ORTE_SUCCESS != (rc = orte_rml_base_parse_uris(orte_proc_info.my_hnp_uri,
|
||||
ORTE_PROC_MY_HNP, NULL))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
@ -561,7 +561,7 @@ static int init_routes(orte_jobid_t job, opal_buffer_t *ndat)
|
||||
}
|
||||
|
||||
|
||||
if (orte_process_info.hnp) {
|
||||
if (orte_proc_info.hnp) {
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_routed_base_output,
|
||||
"%s routed_binomial: init routes for HNP job %s",
|
||||
@ -669,10 +669,10 @@ static int init_routes(orte_jobid_t job, opal_buffer_t *ndat)
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_routed_base_output,
|
||||
"%s routed_binomial: init routes for proc job %s\n\thnp_uri %s\n\tdaemon uri %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_JOBID_PRINT(job),
|
||||
(NULL == orte_process_info.my_hnp_uri) ? "NULL" : orte_process_info.my_hnp_uri,
|
||||
(NULL == orte_process_info.my_daemon_uri) ? "NULL" : orte_process_info.my_daemon_uri));
|
||||
(NULL == orte_proc_info.my_hnp_uri) ? "NULL" : orte_proc_info.my_hnp_uri,
|
||||
(NULL == orte_proc_info.my_daemon_uri) ? "NULL" : orte_proc_info.my_daemon_uri));
|
||||
|
||||
if (NULL == orte_process_info.my_daemon_uri) {
|
||||
if (NULL == orte_proc_info.my_daemon_uri) {
|
||||
/* in this module, we absolutely MUST have this information - if
|
||||
* we didn't get it, then error out
|
||||
*/
|
||||
@ -691,7 +691,7 @@ static int init_routes(orte_jobid_t job, opal_buffer_t *ndat)
|
||||
* to it. This is required to ensure that we -do- send messages to the correct
|
||||
* HNP name
|
||||
*/
|
||||
if (ORTE_SUCCESS != (rc = orte_rml_base_parse_uris(orte_process_info.my_hnp_uri,
|
||||
if (ORTE_SUCCESS != (rc = orte_rml_base_parse_uris(orte_proc_info.my_hnp_uri,
|
||||
ORTE_PROC_MY_HNP, NULL))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
@ -701,12 +701,12 @@ static int init_routes(orte_jobid_t job, opal_buffer_t *ndat)
|
||||
* the connection, but just tells the RML how to reach the daemon
|
||||
* if/when we attempt to send to it
|
||||
*/
|
||||
if (ORTE_SUCCESS != (rc = orte_rml.set_contact_info(orte_process_info.my_daemon_uri))) {
|
||||
if (ORTE_SUCCESS != (rc = orte_rml.set_contact_info(orte_proc_info.my_daemon_uri))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return(rc);
|
||||
}
|
||||
/* extract the daemon's name so we can update the routing table */
|
||||
if (ORTE_SUCCESS != (rc = orte_rml_base_parse_uris(orte_process_info.my_daemon_uri,
|
||||
if (ORTE_SUCCESS != (rc = orte_rml_base_parse_uris(orte_proc_info.my_daemon_uri,
|
||||
ORTE_PROC_MY_DAEMON, NULL))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
@ -854,7 +854,7 @@ static int update_routing_tree(void)
|
||||
/* if I am anything other than a daemon or the HNP, this
|
||||
* is a meaningless command as I am not allowed to route
|
||||
*/
|
||||
if (!orte_process_info.daemon && !orte_process_info.hnp) {
|
||||
if (!orte_proc_info.daemon && !orte_proc_info.hnp) {
|
||||
return ORTE_ERR_NOT_SUPPORTED;
|
||||
}
|
||||
|
||||
@ -868,7 +868,7 @@ static int update_routing_tree(void)
|
||||
* lie underneath their branch
|
||||
*/
|
||||
my_parent.vpid = binomial_tree(0, 0, ORTE_PROC_MY_NAME->vpid,
|
||||
orte_process_info.num_procs,
|
||||
orte_proc_info.num_procs,
|
||||
&num_children, &my_children, NULL);
|
||||
|
||||
if (0 < opal_output_get_verbosity(orte_routed_base_output)) {
|
||||
@ -878,7 +878,7 @@ static int update_routing_tree(void)
|
||||
item = opal_list_get_next(item)) {
|
||||
child = (orte_routed_tree_t*)item;
|
||||
opal_output(0, "%s: \tchild %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), child->vpid);
|
||||
for (j=0; j < (int)orte_process_info.num_procs; j++) {
|
||||
for (j=0; j < (int)orte_proc_info.num_procs; j++) {
|
||||
if (opal_bitmap_is_set_bit(&child->relatives, j)) {
|
||||
opal_output(0, "%s: \t\trelation %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), j);
|
||||
}
|
||||
@ -897,7 +897,7 @@ static orte_vpid_t get_routing_tree(opal_list_t *children)
|
||||
/* if I am anything other than a daemon or the HNP, this
|
||||
* is a meaningless command as I am not allowed to route
|
||||
*/
|
||||
if (!orte_process_info.daemon && !orte_process_info.hnp) {
|
||||
if (!orte_proc_info.daemon && !orte_proc_info.hnp) {
|
||||
return ORTE_VPID_INVALID;
|
||||
}
|
||||
|
||||
@ -928,7 +928,7 @@ static int get_wireup_info(opal_buffer_t *buf)
|
||||
* is a meaningless command as I cannot get
|
||||
* the requested info
|
||||
*/
|
||||
if (!orte_process_info.hnp) {
|
||||
if (!orte_proc_info.hnp) {
|
||||
return ORTE_ERR_NOT_SUPPORTED;
|
||||
}
|
||||
|
||||
|
@ -103,9 +103,9 @@ static int finalize(void)
|
||||
/* if I am an application process, indicate that I am
|
||||
* truly finalizing prior to departure
|
||||
*/
|
||||
if (!orte_process_info.hnp &&
|
||||
!orte_process_info.daemon &&
|
||||
!orte_process_info.tool) {
|
||||
if (!orte_proc_info.hnp &&
|
||||
!orte_proc_info.daemon &&
|
||||
!orte_proc_info.tool) {
|
||||
if (ORTE_SUCCESS != (rc = orte_routed_base_register_sync(false))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
@ -113,7 +113,7 @@ static int finalize(void)
|
||||
}
|
||||
|
||||
/* if I am the HNP, I need to stop the comm recv */
|
||||
if (orte_process_info.hnp) {
|
||||
if (orte_proc_info.hnp) {
|
||||
orte_routed_base_comm_stop();
|
||||
}
|
||||
|
||||
@ -140,8 +140,8 @@ static int delete_route(orte_process_name_t *proc)
|
||||
/* if I am an application process, I don't have any routes
|
||||
* so there is nothing for me to do
|
||||
*/
|
||||
if (!orte_process_info.hnp && !orte_process_info.daemon &&
|
||||
!orte_process_info.tool) {
|
||||
if (!orte_proc_info.hnp && !orte_proc_info.daemon &&
|
||||
!orte_proc_info.tool) {
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
@ -161,7 +161,7 @@ static int delete_route(orte_process_name_t *proc)
|
||||
* in my routing table and thus have nothing to do
|
||||
* here, just return
|
||||
*/
|
||||
if (orte_process_info.daemon) {
|
||||
if (orte_proc_info.daemon) {
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
@ -208,8 +208,8 @@ static int update_route(orte_process_name_t *target,
|
||||
/* if I am an application process, we don't update the route since
|
||||
* we automatically route everything through the local daemon
|
||||
*/
|
||||
if (!orte_process_info.hnp && !orte_process_info.daemon &&
|
||||
!orte_process_info.tool) {
|
||||
if (!orte_proc_info.hnp && !orte_proc_info.daemon &&
|
||||
!orte_proc_info.tool) {
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
@ -236,7 +236,7 @@ static int update_route(orte_process_name_t *target,
|
||||
* anything to this job family via my HNP - so nothing to do
|
||||
* here, just return
|
||||
*/
|
||||
if (orte_process_info.daemon) {
|
||||
if (orte_proc_info.daemon) {
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
@ -296,8 +296,8 @@ static orte_process_name_t get_route(orte_process_name_t *target)
|
||||
}
|
||||
|
||||
/* if I am an application process, always route via my local daemon */
|
||||
if (!orte_process_info.hnp && !orte_process_info.daemon &&
|
||||
!orte_process_info.tool) {
|
||||
if (!orte_proc_info.hnp && !orte_proc_info.daemon &&
|
||||
!orte_proc_info.tool) {
|
||||
ret = ORTE_PROC_MY_DAEMON;
|
||||
goto found;
|
||||
}
|
||||
@ -315,7 +315,7 @@ static orte_process_name_t get_route(orte_process_name_t *target)
|
||||
/* IF THIS IS FOR A DIFFERENT JOB FAMILY... */
|
||||
if (ORTE_JOB_FAMILY(target->jobid) != ORTE_JOB_FAMILY(ORTE_PROC_MY_NAME->jobid)) {
|
||||
/* if I am a daemon, route this via the HNP */
|
||||
if (orte_process_info.daemon) {
|
||||
if (orte_proc_info.daemon) {
|
||||
ret = ORTE_PROC_MY_HNP;
|
||||
goto found;
|
||||
}
|
||||
@ -368,7 +368,7 @@ static orte_process_name_t get_route(orte_process_name_t *target)
|
||||
daemon.vpid = ORTE_PROC_MY_NAME->vpid - 1;
|
||||
ret = &daemon;
|
||||
} else {
|
||||
if (ORTE_PROC_MY_NAME->vpid < orte_process_info.num_procs-1) {
|
||||
if (ORTE_PROC_MY_NAME->vpid < orte_proc_info.num_procs-1) {
|
||||
daemon.vpid = ORTE_PROC_MY_NAME->vpid + 1;
|
||||
} else {
|
||||
/* we are at end of chain - wrap around */
|
||||
@ -493,7 +493,7 @@ static int init_routes(orte_jobid_t job, opal_buffer_t *ndat)
|
||||
int rc;
|
||||
|
||||
/* if I am a tool, then I stand alone - there is nothing to do */
|
||||
if (orte_process_info.tool) {
|
||||
if (orte_proc_info.tool) {
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
@ -501,31 +501,31 @@ static int init_routes(orte_jobid_t job, opal_buffer_t *ndat)
|
||||
* from the data sent to me for launch and update the routing tables to
|
||||
* point at the daemon for each proc
|
||||
*/
|
||||
if (orte_process_info.daemon) {
|
||||
if (orte_proc_info.daemon) {
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_routed_base_output,
|
||||
"%s routed_linear: init routes for daemon job %s\n\thnp_uri %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_JOBID_PRINT(job),
|
||||
(NULL == orte_process_info.my_hnp_uri) ? "NULL" : orte_process_info.my_hnp_uri));
|
||||
(NULL == orte_proc_info.my_hnp_uri) ? "NULL" : orte_proc_info.my_hnp_uri));
|
||||
|
||||
if (NULL == ndat) {
|
||||
/* indicates this is being called during orte_init.
|
||||
* Get the HNP's name for possible later use
|
||||
*/
|
||||
if (NULL == orte_process_info.my_hnp_uri) {
|
||||
if (NULL == orte_proc_info.my_hnp_uri) {
|
||||
/* fatal error */
|
||||
ORTE_ERROR_LOG(ORTE_ERR_FATAL);
|
||||
return ORTE_ERR_FATAL;
|
||||
}
|
||||
/* set the contact info into the hash table */
|
||||
if (ORTE_SUCCESS != (rc = orte_rml.set_contact_info(orte_process_info.my_hnp_uri))) {
|
||||
if (ORTE_SUCCESS != (rc = orte_rml.set_contact_info(orte_proc_info.my_hnp_uri))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return(rc);
|
||||
}
|
||||
|
||||
/* extract the hnp name and store it */
|
||||
if (ORTE_SUCCESS != (rc = orte_rml_base_parse_uris(orte_process_info.my_hnp_uri,
|
||||
if (ORTE_SUCCESS != (rc = orte_rml_base_parse_uris(orte_proc_info.my_hnp_uri,
|
||||
ORTE_PROC_MY_HNP, NULL))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
@ -556,7 +556,7 @@ static int init_routes(orte_jobid_t job, opal_buffer_t *ndat)
|
||||
}
|
||||
|
||||
|
||||
if (orte_process_info.hnp) {
|
||||
if (orte_proc_info.hnp) {
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_routed_base_output,
|
||||
"%s routed_linear: init routes for HNP job %s",
|
||||
@ -664,10 +664,10 @@ static int init_routes(orte_jobid_t job, opal_buffer_t *ndat)
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_routed_base_output,
|
||||
"%s routed_linear: init routes for proc job %s\n\thnp_uri %s\n\tdaemon uri %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_JOBID_PRINT(job),
|
||||
(NULL == orte_process_info.my_hnp_uri) ? "NULL" : orte_process_info.my_hnp_uri,
|
||||
(NULL == orte_process_info.my_daemon_uri) ? "NULL" : orte_process_info.my_daemon_uri));
|
||||
(NULL == orte_proc_info.my_hnp_uri) ? "NULL" : orte_proc_info.my_hnp_uri,
|
||||
(NULL == orte_proc_info.my_daemon_uri) ? "NULL" : orte_proc_info.my_daemon_uri));
|
||||
|
||||
if (NULL == orte_process_info.my_daemon_uri) {
|
||||
if (NULL == orte_proc_info.my_daemon_uri) {
|
||||
/* in this module, we absolutely MUST have this information - if
|
||||
* we didn't get it, then error out
|
||||
*/
|
||||
@ -686,7 +686,7 @@ static int init_routes(orte_jobid_t job, opal_buffer_t *ndat)
|
||||
* to it. This is required to ensure that we -do- send messages to the correct
|
||||
* HNP name
|
||||
*/
|
||||
if (ORTE_SUCCESS != (rc = orte_rml_base_parse_uris(orte_process_info.my_hnp_uri,
|
||||
if (ORTE_SUCCESS != (rc = orte_rml_base_parse_uris(orte_proc_info.my_hnp_uri,
|
||||
ORTE_PROC_MY_HNP, NULL))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
@ -696,12 +696,12 @@ static int init_routes(orte_jobid_t job, opal_buffer_t *ndat)
|
||||
* the connection, but just tells the RML how to reach the daemon
|
||||
* if/when we attempt to send to it
|
||||
*/
|
||||
if (ORTE_SUCCESS != (rc = orte_rml.set_contact_info(orte_process_info.my_daemon_uri))) {
|
||||
if (ORTE_SUCCESS != (rc = orte_rml.set_contact_info(orte_proc_info.my_daemon_uri))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return(rc);
|
||||
}
|
||||
/* extract the daemon's name so we can update the routing table */
|
||||
if (ORTE_SUCCESS != (rc = orte_rml_base_parse_uris(orte_process_info.my_daemon_uri,
|
||||
if (ORTE_SUCCESS != (rc = orte_rml_base_parse_uris(orte_proc_info.my_daemon_uri,
|
||||
ORTE_PROC_MY_DAEMON, NULL))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
@ -784,7 +784,7 @@ static int update_routing_tree(void)
|
||||
/* if I am anything other than a daemon or the HNP, this
|
||||
* is a meaningless command as I am not allowed to route
|
||||
*/
|
||||
if (!orte_process_info.daemon && !orte_process_info.hnp) {
|
||||
if (!orte_proc_info.daemon && !orte_proc_info.hnp) {
|
||||
return ORTE_ERR_NOT_SUPPORTED;
|
||||
}
|
||||
|
||||
@ -800,28 +800,28 @@ static orte_vpid_t get_routing_tree(opal_list_t *children)
|
||||
/* if I am anything other than a daemon or the HNP, this
|
||||
* is a meaningless command as I am not allowed to route
|
||||
*/
|
||||
if (!orte_process_info.daemon && !orte_process_info.hnp) {
|
||||
if (!orte_proc_info.daemon && !orte_proc_info.hnp) {
|
||||
return ORTE_VPID_INVALID;
|
||||
}
|
||||
|
||||
/* the linear routing tree consists of a chain of daemons
|
||||
* extending from the HNP to orte_process_info.num_procs-1.
|
||||
* extending from the HNP to orte_proc_info.num_procs-1.
|
||||
* Accordingly, my child is just the my_vpid+1 daemon
|
||||
*/
|
||||
if (NULL != children &&
|
||||
ORTE_PROC_MY_NAME->vpid < orte_process_info.num_procs-1) {
|
||||
ORTE_PROC_MY_NAME->vpid < orte_proc_info.num_procs-1) {
|
||||
/* my child is just the vpid+1 daemon */
|
||||
nm = OBJ_NEW(orte_routed_tree_t);
|
||||
opal_bitmap_init(&nm->relatives, orte_process_info.num_procs);
|
||||
opal_bitmap_init(&nm->relatives, orte_proc_info.num_procs);
|
||||
nm->vpid = ORTE_PROC_MY_NAME->vpid + 1;
|
||||
/* my relatives are everyone above that point */
|
||||
for (v=nm->vpid+1; v < orte_process_info.num_procs; v++) {
|
||||
for (v=nm->vpid+1; v < orte_proc_info.num_procs; v++) {
|
||||
opal_bitmap_set_bit(&nm->relatives, v);
|
||||
}
|
||||
opal_list_append(children, &nm->super);
|
||||
}
|
||||
|
||||
if (orte_process_info.hnp) {
|
||||
if (orte_proc_info.hnp) {
|
||||
/* the parent of the HNP is invalid */
|
||||
return ORTE_VPID_INVALID;
|
||||
}
|
||||
@ -839,7 +839,7 @@ static int get_wireup_info(opal_buffer_t *buf)
|
||||
* is a meaningless command as I cannot get
|
||||
* the requested info
|
||||
*/
|
||||
if (!orte_process_info.hnp) {
|
||||
if (!orte_proc_info.hnp) {
|
||||
return ORTE_ERR_NOT_SUPPORTED;
|
||||
}
|
||||
|
||||
|
@ -113,9 +113,9 @@ static int finalize(void)
|
||||
/* if I am an application process, indicate that I am
|
||||
* truly finalizing prior to departure
|
||||
*/
|
||||
if (!orte_process_info.hnp &&
|
||||
!orte_process_info.daemon &&
|
||||
!orte_process_info.tool) {
|
||||
if (!orte_proc_info.hnp &&
|
||||
!orte_proc_info.daemon &&
|
||||
!orte_proc_info.tool) {
|
||||
if (ORTE_SUCCESS != (rc = orte_routed_base_register_sync(false))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
@ -123,7 +123,7 @@ static int finalize(void)
|
||||
}
|
||||
|
||||
/* if I am the HNP, I need to stop the comm recv */
|
||||
if (orte_process_info.hnp) {
|
||||
if (orte_proc_info.hnp) {
|
||||
orte_routed_base_comm_stop();
|
||||
}
|
||||
|
||||
@ -157,8 +157,8 @@ static int delete_route(orte_process_name_t *proc)
|
||||
/* if I am an application process, I don't have any routes
|
||||
* so there is nothing for me to do
|
||||
*/
|
||||
if (!orte_process_info.hnp && !orte_process_info.daemon &&
|
||||
!orte_process_info.tool) {
|
||||
if (!orte_proc_info.hnp && !orte_proc_info.daemon &&
|
||||
!orte_proc_info.tool) {
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
@ -178,7 +178,7 @@ static int delete_route(orte_process_name_t *proc)
|
||||
* in my routing table and thus have nothing to do
|
||||
* here, just return
|
||||
*/
|
||||
if (orte_process_info.daemon) {
|
||||
if (orte_proc_info.daemon) {
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
@ -225,8 +225,8 @@ static int update_route(orte_process_name_t *target,
|
||||
/* if I am an application process, we don't update the route since
|
||||
* we automatically route everything through the local daemon
|
||||
*/
|
||||
if (!orte_process_info.hnp && !orte_process_info.daemon &&
|
||||
!orte_process_info.tool) {
|
||||
if (!orte_proc_info.hnp && !orte_proc_info.daemon &&
|
||||
!orte_proc_info.tool) {
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
@ -253,7 +253,7 @@ static int update_route(orte_process_name_t *target,
|
||||
* anything to this job family via my HNP - so nothing to do
|
||||
* here, just return
|
||||
*/
|
||||
if (orte_process_info.daemon) {
|
||||
if (orte_proc_info.daemon) {
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
@ -321,8 +321,8 @@ static orte_process_name_t get_route(orte_process_name_t *target)
|
||||
}
|
||||
|
||||
/* if I am an application process, always route via my local daemon */
|
||||
if (!orte_process_info.hnp && !orte_process_info.daemon &&
|
||||
!orte_process_info.tool) {
|
||||
if (!orte_proc_info.hnp && !orte_proc_info.daemon &&
|
||||
!orte_proc_info.tool) {
|
||||
ret = ORTE_PROC_MY_DAEMON;
|
||||
goto found;
|
||||
}
|
||||
@ -340,7 +340,7 @@ static orte_process_name_t get_route(orte_process_name_t *target)
|
||||
/* IF THIS IS FOR A DIFFERENT JOB FAMILY... */
|
||||
if (ORTE_JOB_FAMILY(target->jobid) != ORTE_JOB_FAMILY(ORTE_PROC_MY_NAME->jobid)) {
|
||||
/* if I am a daemon, route this via the HNP */
|
||||
if (orte_process_info.daemon) {
|
||||
if (orte_proc_info.daemon) {
|
||||
ret = ORTE_PROC_MY_HNP;
|
||||
goto found;
|
||||
}
|
||||
@ -525,7 +525,7 @@ static int init_routes(orte_jobid_t job, opal_buffer_t *ndat)
|
||||
int rc;
|
||||
|
||||
/* if I am a tool, then I stand alone - there is nothing to do */
|
||||
if (orte_process_info.tool) {
|
||||
if (orte_proc_info.tool) {
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
@ -533,31 +533,31 @@ static int init_routes(orte_jobid_t job, opal_buffer_t *ndat)
|
||||
* from the data sent to me for launch and update the routing tables to
|
||||
* point at the daemon for each proc
|
||||
*/
|
||||
if (orte_process_info.daemon) {
|
||||
if (orte_proc_info.daemon) {
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_routed_base_output,
|
||||
"%s routed_radix: init routes for daemon job %s\n\thnp_uri %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_JOBID_PRINT(job),
|
||||
(NULL == orte_process_info.my_hnp_uri) ? "NULL" : orte_process_info.my_hnp_uri));
|
||||
(NULL == orte_proc_info.my_hnp_uri) ? "NULL" : orte_proc_info.my_hnp_uri));
|
||||
|
||||
if (NULL == ndat) {
|
||||
/* indicates this is being called during orte_init.
|
||||
* Get the HNP's name for possible later use
|
||||
*/
|
||||
if (NULL == orte_process_info.my_hnp_uri) {
|
||||
if (NULL == orte_proc_info.my_hnp_uri) {
|
||||
/* fatal error */
|
||||
ORTE_ERROR_LOG(ORTE_ERR_FATAL);
|
||||
return ORTE_ERR_FATAL;
|
||||
}
|
||||
/* set the contact info into the hash table */
|
||||
if (ORTE_SUCCESS != (rc = orte_rml.set_contact_info(orte_process_info.my_hnp_uri))) {
|
||||
if (ORTE_SUCCESS != (rc = orte_rml.set_contact_info(orte_proc_info.my_hnp_uri))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return(rc);
|
||||
}
|
||||
|
||||
/* extract the hnp name and store it */
|
||||
if (ORTE_SUCCESS != (rc = orte_rml_base_parse_uris(orte_process_info.my_hnp_uri,
|
||||
if (ORTE_SUCCESS != (rc = orte_rml_base_parse_uris(orte_proc_info.my_hnp_uri,
|
||||
ORTE_PROC_MY_HNP, NULL))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
@ -588,7 +588,7 @@ static int init_routes(orte_jobid_t job, opal_buffer_t *ndat)
|
||||
}
|
||||
|
||||
|
||||
if (orte_process_info.hnp) {
|
||||
if (orte_proc_info.hnp) {
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_routed_base_output,
|
||||
"%s routed_radix: init routes for HNP job %s",
|
||||
@ -696,10 +696,10 @@ static int init_routes(orte_jobid_t job, opal_buffer_t *ndat)
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_routed_base_output,
|
||||
"%s routed_radix: init routes for proc job %s\n\thnp_uri %s\n\tdaemon uri %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_JOBID_PRINT(job),
|
||||
(NULL == orte_process_info.my_hnp_uri) ? "NULL" : orte_process_info.my_hnp_uri,
|
||||
(NULL == orte_process_info.my_daemon_uri) ? "NULL" : orte_process_info.my_daemon_uri));
|
||||
(NULL == orte_proc_info.my_hnp_uri) ? "NULL" : orte_proc_info.my_hnp_uri,
|
||||
(NULL == orte_proc_info.my_daemon_uri) ? "NULL" : orte_proc_info.my_daemon_uri));
|
||||
|
||||
if (NULL == orte_process_info.my_daemon_uri) {
|
||||
if (NULL == orte_proc_info.my_daemon_uri) {
|
||||
/* in this module, we absolutely MUST have this information - if
|
||||
* we didn't get it, then error out
|
||||
*/
|
||||
@ -718,7 +718,7 @@ static int init_routes(orte_jobid_t job, opal_buffer_t *ndat)
|
||||
* to it. This is required to ensure that we -do- send messages to the correct
|
||||
* HNP name
|
||||
*/
|
||||
if (ORTE_SUCCESS != (rc = orte_rml_base_parse_uris(orte_process_info.my_hnp_uri,
|
||||
if (ORTE_SUCCESS != (rc = orte_rml_base_parse_uris(orte_proc_info.my_hnp_uri,
|
||||
ORTE_PROC_MY_HNP, NULL))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
@ -728,12 +728,12 @@ static int init_routes(orte_jobid_t job, opal_buffer_t *ndat)
|
||||
* the connection, but just tells the RML how to reach the daemon
|
||||
* if/when we attempt to send to it
|
||||
*/
|
||||
if (ORTE_SUCCESS != (rc = orte_rml.set_contact_info(orte_process_info.my_daemon_uri))) {
|
||||
if (ORTE_SUCCESS != (rc = orte_rml.set_contact_info(orte_proc_info.my_daemon_uri))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return(rc);
|
||||
}
|
||||
/* extract the daemon's name so we can update the routing table */
|
||||
if (ORTE_SUCCESS != (rc = orte_rml_base_parse_uris(orte_process_info.my_daemon_uri,
|
||||
if (ORTE_SUCCESS != (rc = orte_rml_base_parse_uris(orte_proc_info.my_daemon_uri,
|
||||
ORTE_PROC_MY_DAEMON, NULL))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
@ -828,7 +828,7 @@ static void radix_tree(int rank, int *num_children,
|
||||
/* our children start at our rank + num_in_level */
|
||||
peer = rank + NInLevel;
|
||||
for (i = 0; i < mca_routed_radix_component.radix; i++) {
|
||||
if (peer < (int)orte_process_info.num_procs) {
|
||||
if (peer < (int)orte_proc_info.num_procs) {
|
||||
child = OBJ_NEW(orte_routed_tree_t);
|
||||
child->vpid = peer;
|
||||
if (NULL != children) {
|
||||
@ -836,7 +836,7 @@ static void radix_tree(int rank, int *num_children,
|
||||
opal_list_append(children, &child->super);
|
||||
(*num_children)++;
|
||||
/* setup the relatives bitmap */
|
||||
opal_bitmap_init(&child->relatives, orte_process_info.num_procs);
|
||||
opal_bitmap_init(&child->relatives, orte_proc_info.num_procs);
|
||||
/* point to the relatives */
|
||||
relations = &child->relatives;
|
||||
} else {
|
||||
@ -865,7 +865,7 @@ static int update_routing_tree(void)
|
||||
/* if I am anything other than a daemon or the HNP, this
|
||||
* is a meaningless command as I am not allowed to route
|
||||
*/
|
||||
if (!orte_process_info.daemon && !orte_process_info.hnp) {
|
||||
if (!orte_proc_info.daemon && !orte_proc_info.hnp) {
|
||||
return ORTE_ERR_NOT_SUPPORTED;
|
||||
}
|
||||
|
||||
@ -909,7 +909,7 @@ static int update_routing_tree(void)
|
||||
item = opal_list_get_next(item)) {
|
||||
child = (orte_routed_tree_t*)item;
|
||||
opal_output(0, "%s: \tchild %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), child->vpid);
|
||||
for (j=0; j < (int)orte_process_info.num_procs; j++) {
|
||||
for (j=0; j < (int)orte_proc_info.num_procs; j++) {
|
||||
if (opal_bitmap_is_set_bit(&child->relatives, j)) {
|
||||
opal_output(0, "%s: \t\trelation %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), j);
|
||||
}
|
||||
@ -928,7 +928,7 @@ static orte_vpid_t get_routing_tree(opal_list_t *children)
|
||||
/* if I am anything other than a daemon or the HNP, this
|
||||
* is a meaningless command as I am not allowed to route
|
||||
*/
|
||||
if (!orte_process_info.daemon && !orte_process_info.hnp) {
|
||||
if (!orte_proc_info.daemon && !orte_proc_info.hnp) {
|
||||
return ORTE_VPID_INVALID;
|
||||
}
|
||||
|
||||
@ -958,7 +958,7 @@ static int get_wireup_info(opal_buffer_t *buf)
|
||||
* is a meaningless command as I cannot get
|
||||
* the requested info
|
||||
*/
|
||||
if (!orte_process_info.hnp) {
|
||||
if (!orte_proc_info.hnp) {
|
||||
return ORTE_ERR_NOT_SUPPORTED;
|
||||
}
|
||||
|
||||
|
@ -176,10 +176,10 @@ static int init_routes(orte_jobid_t job, opal_buffer_t *ndat)
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_routed_base_output,
|
||||
"%s routed_slave: init routes for proc job %s\n\thnp_uri %s\n\tdaemon uri %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_JOBID_PRINT(job),
|
||||
(NULL == orte_process_info.my_hnp_uri) ? "NULL" : orte_process_info.my_hnp_uri,
|
||||
(NULL == orte_process_info.my_daemon_uri) ? "NULL" : orte_process_info.my_daemon_uri));
|
||||
(NULL == orte_proc_info.my_hnp_uri) ? "NULL" : orte_proc_info.my_hnp_uri,
|
||||
(NULL == orte_proc_info.my_daemon_uri) ? "NULL" : orte_proc_info.my_daemon_uri));
|
||||
|
||||
if (NULL == orte_process_info.my_daemon_uri) {
|
||||
if (NULL == orte_proc_info.my_daemon_uri) {
|
||||
/* in this module, we absolutely MUST have this information - if
|
||||
* we didn't get it, then error out
|
||||
*/
|
||||
@ -198,7 +198,7 @@ static int init_routes(orte_jobid_t job, opal_buffer_t *ndat)
|
||||
* to it. This is required to ensure that we -do- send messages to the correct
|
||||
* HNP name
|
||||
*/
|
||||
if (ORTE_SUCCESS != (rc = orte_rml_base_parse_uris(orte_process_info.my_hnp_uri,
|
||||
if (ORTE_SUCCESS != (rc = orte_rml_base_parse_uris(orte_proc_info.my_hnp_uri,
|
||||
ORTE_PROC_MY_HNP, NULL))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
@ -208,12 +208,12 @@ static int init_routes(orte_jobid_t job, opal_buffer_t *ndat)
|
||||
* the connection, but just tells the RML how to reach the daemon
|
||||
* if/when we attempt to send to it
|
||||
*/
|
||||
if (ORTE_SUCCESS != (rc = orte_rml.set_contact_info(orte_process_info.my_daemon_uri))) {
|
||||
if (ORTE_SUCCESS != (rc = orte_rml.set_contact_info(orte_proc_info.my_daemon_uri))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return(rc);
|
||||
}
|
||||
/* extract the daemon's name so we can update the routing table */
|
||||
if (ORTE_SUCCESS != (rc = orte_rml_base_parse_uris(orte_process_info.my_daemon_uri,
|
||||
if (ORTE_SUCCESS != (rc = orte_rml_base_parse_uris(orte_proc_info.my_daemon_uri,
|
||||
ORTE_PROC_MY_DAEMON, NULL))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
|
@ -298,7 +298,7 @@ static int snapc_full_global_start_listener(void)
|
||||
int exit_status = ORTE_SUCCESS;
|
||||
int rc;
|
||||
|
||||
if (snapc_recv_issued && orte_process_info.hnp) {
|
||||
if (snapc_recv_issued && orte_proc_info.hnp) {
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
@ -329,7 +329,7 @@ static int snapc_full_global_stop_listener(void)
|
||||
int exit_status = ORTE_SUCCESS;
|
||||
int rc;
|
||||
|
||||
if (!snapc_recv_issued && orte_process_info.hnp) {
|
||||
if (!snapc_recv_issued && orte_proc_info.hnp) {
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
@ -354,7 +354,7 @@ static int snapc_full_global_start_cmdline_listener(void)
|
||||
int exit_status = ORTE_SUCCESS;
|
||||
int rc;
|
||||
|
||||
if (snapc_cmdline_recv_issued && orte_process_info.hnp) {
|
||||
if (snapc_cmdline_recv_issued && orte_proc_info.hnp) {
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
@ -385,7 +385,7 @@ static int snapc_full_global_stop_cmdline_listener(void)
|
||||
int exit_status = ORTE_SUCCESS;
|
||||
int rc;
|
||||
|
||||
if (!snapc_cmdline_recv_issued && orte_process_info.hnp) {
|
||||
if (!snapc_cmdline_recv_issued && orte_proc_info.hnp) {
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
|
@ -194,7 +194,7 @@ void orte_daemon_cmd_processor(int fd, short event, void *data)
|
||||
orte_daemon_cmd_flag_t command;
|
||||
|
||||
/* check to see if we are in a progress recursion */
|
||||
if (orte_process_info.daemon && 1 < (ret = opal_progress_recursion_depth())) {
|
||||
if (orte_proc_info.daemon && 1 < (ret = opal_progress_recursion_depth())) {
|
||||
/* if we are in a recursion, we want to repost the message event
|
||||
* so the progress engine can work its way back up to the top
|
||||
* of the stack. Given that this could happen multiple times,
|
||||
@ -234,7 +234,7 @@ void orte_daemon_cmd_processor(int fd, short event, void *data)
|
||||
wait_time = 1;
|
||||
num_recursions = 0;
|
||||
|
||||
if (orte_timing && orte_process_info.hnp) {
|
||||
if (orte_timing && orte_proc_info.hnp) {
|
||||
/* if we are doing timing, and we are the HNP, then the message doesn't come
|
||||
* through the RML recv, so we have to pickup the recv time here
|
||||
*/
|
||||
@ -526,7 +526,7 @@ static int process_commands(orte_process_name_t* sender,
|
||||
goto CLEANUP;
|
||||
}
|
||||
/* initialize the routes to my peers - this will update the number
|
||||
* of daemons in the system (i.e., orte_process_info.num_procs) as
|
||||
* of daemons in the system (i.e., orte_proc_info.num_procs) as
|
||||
* this might have changed
|
||||
*/
|
||||
if (ORTE_SUCCESS != (ret = orte_routed.init_routes(ORTE_PROC_MY_NAME->jobid, relay_msg))) {
|
||||
@ -605,7 +605,7 @@ static int process_commands(orte_process_name_t* sender,
|
||||
/* if we are the HNP, kill our local procs and
|
||||
* flag we are exited - but don't yet exit
|
||||
*/
|
||||
if (orte_process_info.hnp) {
|
||||
if (orte_proc_info.hnp) {
|
||||
orte_job_t *daemons;
|
||||
orte_proc_t **procs;
|
||||
/* if we are the HNP, ensure our local procs are terminated */
|
||||
@ -663,7 +663,7 @@ static int process_commands(orte_process_name_t* sender,
|
||||
/* if we are the HNP, kill our local procs and
|
||||
* flag we are exited - but don't yet exit
|
||||
*/
|
||||
if (orte_process_info.hnp) {
|
||||
if (orte_proc_info.hnp) {
|
||||
orte_job_t *daemons;
|
||||
orte_proc_t **procs;
|
||||
/* if we are the HNP, ensure our local procs are terminated */
|
||||
@ -709,7 +709,7 @@ static int process_commands(orte_process_name_t* sender,
|
||||
answer = OBJ_NEW(opal_buffer_t);
|
||||
job = ORTE_JOBID_INVALID;
|
||||
/* can only process this if we are the HNP */
|
||||
if (orte_process_info.hnp) {
|
||||
if (orte_proc_info.hnp) {
|
||||
/* unpack the job data */
|
||||
n = 1;
|
||||
if (ORTE_SUCCESS != (ret = opal_dss.unpack(buffer, &jdata, &n, ORTE_JOB))) {
|
||||
@ -778,7 +778,7 @@ static int process_commands(orte_process_name_t* sender,
|
||||
/* if we are not the HNP, we can do nothing - report
|
||||
* back 0 procs so the tool won't hang
|
||||
*/
|
||||
if (!orte_process_info.hnp) {
|
||||
if (!orte_proc_info.hnp) {
|
||||
orte_std_cntr_t zero=0;
|
||||
|
||||
answer = OBJ_NEW(opal_buffer_t);
|
||||
@ -861,7 +861,7 @@ static int process_commands(orte_process_name_t* sender,
|
||||
/* if we are not the HNP, we can do nothing - report
|
||||
* back 0 nodes so the tool won't hang
|
||||
*/
|
||||
if (!orte_process_info.hnp) {
|
||||
if (!orte_proc_info.hnp) {
|
||||
orte_std_cntr_t zero=0;
|
||||
|
||||
answer = OBJ_NEW(opal_buffer_t);
|
||||
@ -942,7 +942,7 @@ static int process_commands(orte_process_name_t* sender,
|
||||
/* if we are not the HNP, we can do nothing - report
|
||||
* back 0 procs so the tool won't hang
|
||||
*/
|
||||
if (!orte_process_info.hnp) {
|
||||
if (!orte_proc_info.hnp) {
|
||||
orte_std_cntr_t zero=0;
|
||||
|
||||
answer = OBJ_NEW(opal_buffer_t);
|
||||
@ -1077,7 +1077,7 @@ SEND_ANSWER:
|
||||
* the requestor. We need to convert that to our own job family
|
||||
*/
|
||||
proc.jobid = ORTE_CONSTRUCT_LOCAL_JOBID(ORTE_PROC_MY_NAME->jobid, proc.jobid);
|
||||
if (orte_process_info.hnp) {
|
||||
if (orte_proc_info.hnp) {
|
||||
return_addr = sender;
|
||||
/* if the request is for a wildcard vpid, then it goes to every
|
||||
* daemon. For scalability, we should probably xcast this some
|
||||
@ -1086,7 +1086,7 @@ SEND_ANSWER:
|
||||
if (ORTE_VPID_WILDCARD == proc.vpid) {
|
||||
/* loop across all daemons */
|
||||
proc2.jobid = ORTE_PROC_MY_NAME->jobid;
|
||||
for (proc2.vpid=1; proc2.vpid < orte_process_info.num_procs; proc2.vpid++) {
|
||||
for (proc2.vpid=1; proc2.vpid < orte_proc_info.num_procs; proc2.vpid++) {
|
||||
/* setup the cmd */
|
||||
relay_msg = OBJ_NEW(opal_buffer_t);
|
||||
command = ORTE_DAEMON_TOP_CMD;
|
||||
@ -1200,7 +1200,7 @@ SEND_ANSWER:
|
||||
/* send the answer back to requester - callback
|
||||
* function will release buffer
|
||||
*/
|
||||
if (orte_process_info.hnp) {
|
||||
if (orte_proc_info.hnp) {
|
||||
/* if I am the HNP, I need to also provide the number of
|
||||
* replies the caller should recv and the sample time
|
||||
*/
|
||||
|
Некоторые файлы не были показаны из-за слишком большого количества измененных файлов Показать больше
Загрузка…
x
Ссылка в новой задаче
Block a user