- Second patch, as discussed in Louisville.
Replace short macros in orte/util/name_fns.h to the actual fct. call. - Compiles on linux/x86-64 This commit was SVN r20740.
Этот коммит содержится в:
родитель
781caee0b6
Коммит
2a70618a77
@ -408,10 +408,10 @@ static int mca_bml_r2_add_procs( size_t nprocs,
|
||||
orte_show_help("help-mca-bml-r2.txt",
|
||||
"unreachable proc",
|
||||
true,
|
||||
ORTE_NAME_PRINT(&(ompi_proc_local_proc->proc_name)),
|
||||
orte_util_print_name_args(&(ompi_proc_local_proc->proc_name)),
|
||||
(ompi_proc_local_proc->proc_hostname ?
|
||||
ompi_proc_local_proc->proc_hostname : "unknown!"),
|
||||
ORTE_NAME_PRINT(&(unreach_proc->proc_name)),
|
||||
orte_util_print_name_args(&(unreach_proc->proc_name)),
|
||||
(unreach_proc->proc_hostname ?
|
||||
unreach_proc->proc_hostname : "unknown!"),
|
||||
btl_names);
|
||||
|
@ -63,7 +63,7 @@ void mca_btl_base_error_no_nics(const char* transport,
|
||||
char *procid;
|
||||
if (mca_btl_base_warn_component_unused) {
|
||||
/* print out no-nic warning if user told us to */
|
||||
asprintf(&procid, "%s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
||||
asprintf(&procid, "%s", orte_util_print_name_args(ORTE_PROC_MY_NAME));
|
||||
|
||||
orte_show_help("help-mpi-btl-base.txt", "btl:no-nics",
|
||||
true, procid, transport, orte_proc_info.nodename,
|
||||
|
@ -39,7 +39,7 @@ OMPI_DECLSPEC extern int mca_btl_base_out(const char*, ...);
|
||||
do { \
|
||||
mca_btl_base_out("[%s]%s[%s:%d:%s] ", \
|
||||
orte_proc_info.nodename, \
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), \
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME), \
|
||||
__FILE__, __LINE__, __func__); \
|
||||
mca_btl_base_out args; \
|
||||
mca_btl_base_out("\n"); \
|
||||
@ -50,7 +50,7 @@ do { \
|
||||
do { \
|
||||
mca_btl_base_err("[%s]%s[%s:%d:%s] ", \
|
||||
orte_proc_info.nodename, \
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), \
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME), \
|
||||
__FILE__, __LINE__, __func__); \
|
||||
mca_btl_base_err args; \
|
||||
mca_btl_base_err("\n"); \
|
||||
@ -59,7 +59,7 @@ do { \
|
||||
#define BTL_PEER_ERROR(proc, args) \
|
||||
do { \
|
||||
mca_btl_base_err("%s[%s:%d:%s] from %s ", \
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), \
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME), \
|
||||
__FILE__, __LINE__, __func__, \
|
||||
orte_proc_info.nodename); \
|
||||
if(proc && proc->proc_hostname) { \
|
||||
@ -76,7 +76,7 @@ do { \
|
||||
if(mca_btl_base_verbose > 0) { \
|
||||
mca_btl_base_err("[%s]%s[%s:%d:%s] ", \
|
||||
orte_proc_info.nodename, \
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), \
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME), \
|
||||
__FILE__, __LINE__, __func__); \
|
||||
mca_btl_base_err args; \
|
||||
mca_btl_base_err("\n"); \
|
||||
|
@ -434,7 +434,7 @@ static int mca_btl_gm_discover( void )
|
||||
"%s gm_port %08lX, "
|
||||
"board %" PRIu32 ", global %" PRIu32 " "
|
||||
"node %" PRIu32 "port %" PRIu32 "\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
(unsigned long) port, board_no, global_id, node_id, port_no);
|
||||
}
|
||||
|
||||
|
@ -130,14 +130,14 @@ mca_btl_gm_proc_t* mca_btl_gm_proc_create(ompi_proc_t* ompi_proc)
|
||||
&size);
|
||||
if(OMPI_SUCCESS != rc) {
|
||||
opal_output(0, "[%s:%d] ompi_modex_recv failed for peer %s",
|
||||
__FILE__,__LINE__,ORTE_NAME_PRINT(&ompi_proc->proc_name));
|
||||
__FILE__,__LINE__,orte_util_print_name_args(&ompi_proc->proc_name));
|
||||
OBJ_RELEASE(gm_proc);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if((size % sizeof(mca_btl_gm_addr_t)) != 0) {
|
||||
opal_output(0, "[%s:%d] invalid gm address for peer %s",
|
||||
__FILE__,__LINE__,ORTE_NAME_PRINT(&ompi_proc->proc_name));
|
||||
__FILE__,__LINE__,orte_util_print_name_args(&ompi_proc->proc_name));
|
||||
OBJ_RELEASE(gm_proc);
|
||||
return NULL;
|
||||
}
|
||||
@ -192,7 +192,7 @@ int mca_btl_gm_proc_insert(
|
||||
if(mca_btl_gm_component.gm_debug > 0) {
|
||||
opal_output(0, "%s mapped global id %" PRIu32
|
||||
" to node id %" PRIu32 "\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
gm_endpoint->endpoint_addr.global_id,
|
||||
gm_endpoint->endpoint_addr.node_id);
|
||||
}
|
||||
|
@ -122,7 +122,7 @@ mca_btl_mx_proc_t* mca_btl_mx_proc_create(ompi_proc_t* ompi_proc)
|
||||
ompi_proc, (void*)&mx_peers, &size );
|
||||
if( OMPI_SUCCESS != rc ) {
|
||||
opal_output( 0, "mca_pml_base_modex_recv failed for peer %s",
|
||||
ORTE_NAME_PRINT(&ompi_proc->proc_name) );
|
||||
orte_util_print_name_args(&ompi_proc->proc_name) );
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@ -131,7 +131,7 @@ mca_btl_mx_proc_t* mca_btl_mx_proc_create(ompi_proc_t* ompi_proc)
|
||||
}
|
||||
if( (size % sizeof(mca_btl_mx_addr_t)) != 0 ) {
|
||||
opal_output( 0, "invalid mx address for peer %s",
|
||||
ORTE_NAME_PRINT(&ompi_proc->proc_name) );
|
||||
orte_util_print_name_args(&ompi_proc->proc_name) );
|
||||
return NULL;
|
||||
}
|
||||
/* Let's see if we have a way to connect to the remote proc using MX.
|
||||
|
@ -129,14 +129,14 @@ mca_btl_ud_proc_t* mca_btl_ud_proc_create(ompi_proc_t* ompi_proc)
|
||||
if(OMPI_SUCCESS != rc) {
|
||||
opal_output(0,
|
||||
"[%s:%d] ompi_modex_recv failed for peer %s",
|
||||
__FILE__,__LINE__,ORTE_NAME_PRINT(&ompi_proc->proc_name));
|
||||
__FILE__,__LINE__,orte_util_print_name_args(&ompi_proc->proc_name));
|
||||
OBJ_RELEASE(module_proc);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if((size % sizeof(mca_btl_ud_addr_t)) != 0) {
|
||||
opal_output(0, "[%s:%d] invalid module address for peer %s",
|
||||
__FILE__,__LINE__,ORTE_NAME_PRINT(&ompi_proc->proc_name));
|
||||
__FILE__,__LINE__,orte_util_print_name_args(&ompi_proc->proc_name));
|
||||
OBJ_RELEASE(module_proc);
|
||||
return NULL;
|
||||
}
|
||||
|
@ -159,7 +159,7 @@ mca_btl_openib_proc_t* mca_btl_openib_proc_create(ompi_proc_t* ompi_proc)
|
||||
if (OMPI_SUCCESS != rc) {
|
||||
BTL_ERROR(("[%s:%d] ompi_modex_recv failed for peer %s",
|
||||
__FILE__, __LINE__,
|
||||
ORTE_NAME_PRINT(&ompi_proc->proc_name)));
|
||||
orte_util_print_name_args(&ompi_proc->proc_name)));
|
||||
OBJ_RELEASE(module_proc);
|
||||
return NULL;
|
||||
}
|
||||
|
@ -116,7 +116,7 @@ int mca_btl_pcie_proc_create(ompi_proc_t* ompi_proc,
|
||||
&size);
|
||||
if (OMPI_SUCCESS != rc) {
|
||||
opal_output(mca_btl_base_output, "[%s:%d] ompi_modex_recv failed for peer %s",
|
||||
__FILE__, __LINE__, ORTE_NAME_PRINT(&ompi_proc->proc_name));
|
||||
__FILE__, __LINE__, orte_util_print_name_args(&ompi_proc->proc_name));
|
||||
OBJ_RELEASE(pcie_proc);
|
||||
*ret_proc = NULL;
|
||||
return OMPI_ERROR;
|
||||
|
@ -483,7 +483,7 @@ static int mca_btl_tcp_endpoint_recv_connect_ack(mca_btl_base_endpoint_t* btl_en
|
||||
/* compare this to the expected values */
|
||||
if (OPAL_EQUAL != orte_util_compare_name_fields(ORTE_NS_CMP_ALL, &btl_proc->proc_name, &guid)) {
|
||||
BTL_ERROR(("received unexpected process identifier %s",
|
||||
ORTE_NAME_PRINT(&guid)));
|
||||
orte_util_print_name_args(&guid)));
|
||||
mca_btl_tcp_endpoint_close(btl_endpoint);
|
||||
return OMPI_ERR_UNREACH;
|
||||
}
|
||||
|
@ -133,7 +133,7 @@ mca_btl_udapl_proc_t* mca_btl_udapl_proc_create(ompi_proc_t* ompi_proc)
|
||||
if(OMPI_SUCCESS != rc) {
|
||||
BTL_UDAPL_VERBOSE_OUTPUT(VERBOSE_CRITICAL,
|
||||
("ompi_modex_recv failed for peer %s",
|
||||
ORTE_NAME_PRINT(&ompi_proc->proc_name)));
|
||||
orte_util_print_name_args(&ompi_proc->proc_name)));
|
||||
OBJ_RELEASE(udapl_proc);
|
||||
return NULL;
|
||||
}
|
||||
@ -141,7 +141,7 @@ mca_btl_udapl_proc_t* mca_btl_udapl_proc_create(ompi_proc_t* ompi_proc)
|
||||
if((size % sizeof(mca_btl_udapl_addr_t)) != 0) {
|
||||
BTL_UDAPL_VERBOSE_OUTPUT(VERBOSE_CRITICAL,
|
||||
("invalid udapl address for peer %s",
|
||||
ORTE_NAME_PRINT(&ompi_proc->proc_name)));
|
||||
orte_util_print_name_args(&ompi_proc->proc_name)));
|
||||
OBJ_RELEASE(udapl_proc);
|
||||
return NULL;
|
||||
}
|
||||
|
@ -1477,7 +1477,7 @@ ompi_crcp_base_pml_state_t* ompi_crcp_bkmrk_pml_del_procs(
|
||||
if(NULL == item) {
|
||||
opal_output(mca_crcp_bkmrk_component.super.output_handle,
|
||||
"crcp:bkmrk: del_procs: Unable to find peer %s\n",
|
||||
ORTE_NAME_PRINT(&(procs[i]->proc_name)));
|
||||
orte_util_print_name_args(&(procs[i]->proc_name)));
|
||||
exit_status = OMPI_ERROR;
|
||||
goto DONE;
|
||||
}
|
||||
@ -3021,7 +3021,7 @@ ompi_crcp_base_pml_state_t* ompi_crcp_bkmrk_pml_ft_event(
|
||||
if( OMPI_SUCCESS != (ret = ft_event_coordinate_peers()) ) {
|
||||
opal_output(mca_crcp_bkmrk_component.super.output_handle,
|
||||
"crcp:bkmrk: %s ft_event: Checkpoint Coordination Failed %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
ret);
|
||||
exit_status = ret;
|
||||
goto DONE;
|
||||
@ -3366,7 +3366,7 @@ static int traffic_message_find_mark_persistent(ompi_crcp_bkmrk_pml_traffic_mess
|
||||
else if( loc_breq->req_sequence == breq->req_sequence ) {
|
||||
OPAL_OUTPUT_VERBOSE((25, mca_crcp_bkmrk_component.super.output_handle,
|
||||
"%s %8s Request [%d] (%s) %d : %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
(set_is_active ? "Start" : (NULL != c_ref ? "Drain" : "Complete")),
|
||||
(int)msg_ref->msg_id,
|
||||
(content_ref->active ? "T" : "F"),
|
||||
@ -3467,8 +3467,8 @@ static int traffic_message_create_drain_message(bool post_drain,
|
||||
OPAL_OUTPUT_VERBOSE((10, mca_crcp_bkmrk_component.super.output_handle,
|
||||
"crcp:bkmrk: %s <-- %s "
|
||||
" --> Create Drain Msg: %s %4d = min(%4d / %4d)",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer_ref->proc_name)),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(&(peer_ref->proc_name)),
|
||||
(post_drain ? "Posting" : "Not Posting"),
|
||||
m_total, (*posted_msg_ref)->active, max_post ));
|
||||
|
||||
@ -3495,8 +3495,8 @@ static int traffic_message_create_drain_message(bool post_drain,
|
||||
OPAL_OUTPUT_VERBOSE((10, mca_crcp_bkmrk_component.super.output_handle,
|
||||
"crcp:bkmrk: %s <-- %s "
|
||||
" \t--> Find Content: %s (%4d of %4d)",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer_ref->proc_name)),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(&(peer_ref->proc_name)),
|
||||
(post_drain ? "Posting" : "Not Posting"),
|
||||
m_iter, m_total));
|
||||
|
||||
@ -3573,8 +3573,8 @@ static int traffic_message_create_drain_message(bool post_drain,
|
||||
OPAL_OUTPUT_VERBOSE((15, mca_crcp_bkmrk_component.super.output_handle,
|
||||
"crcp:bkmrk: %s <-- %s "
|
||||
"Added %d messages to the drained list (size = %d)",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer_ref->proc_name)),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(&(peer_ref->proc_name)),
|
||||
(*num_posted),
|
||||
(int)opal_list_get_size(&(peer_ref->drained_list)) ));
|
||||
|
||||
@ -4212,7 +4212,7 @@ static int ft_event_coordinate_peers(void)
|
||||
if( stall_for_completion ) {
|
||||
OPAL_OUTPUT_VERBOSE((10, mca_crcp_bkmrk_component.super.output_handle,
|
||||
"crcp:bkmrk: %s **** STALLING %s in PID %d ***",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
(current_msg_type == COORD_MSG_TYPE_B_SEND ? "Send" : "Recv"),
|
||||
getpid() ));
|
||||
step_to_return_to = 1;
|
||||
@ -4239,7 +4239,7 @@ static int ft_event_coordinate_peers(void)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, mca_crcp_bkmrk_component.super.output_handle,
|
||||
"crcp:bkmrk: %s Coordination Finished...\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
|
||||
/*
|
||||
* Now that all our peer channels are marked as drained
|
||||
@ -4428,10 +4428,10 @@ static int ft_event_check_bookmarks(void)
|
||||
"---------------------------------------------"));
|
||||
OPAL_OUTPUT_VERBOSE((10, mca_crcp_bkmrk_component.super.output_handle,
|
||||
"Process %s Match Table",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
OPAL_OUTPUT_VERBOSE((10, mca_crcp_bkmrk_component.super.output_handle,
|
||||
"%s %5s | %7s | %7s | %7s | %7s |",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
"Vpid", "T_Send", "M_Recv", "M_Send", "T_Recv"));
|
||||
|
||||
for(item = opal_list_get_first(&ompi_crcp_bkmrk_pml_peer_refs);
|
||||
@ -4449,7 +4449,7 @@ static int ft_event_check_bookmarks(void)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((10, mca_crcp_bkmrk_component.super.output_handle,
|
||||
"%s %5d | %7d | %7d | %7d | %7d |",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
peer_ref->proc_name.vpid,
|
||||
t_send, m_recv, m_send, t_recv));
|
||||
}
|
||||
@ -4491,8 +4491,8 @@ static int ft_event_check_bookmarks(void)
|
||||
"crcp:bkmrk: %s --> %s "
|
||||
"Total Sent (%4d) = Matched Recv. (%4d) => Diff (%4d). "
|
||||
" WARNING: Peer received more than was sent. :(\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer_ref->proc_name)),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(&(peer_ref->proc_name)),
|
||||
p_n_to_p_m,
|
||||
p_n_from_p_m,
|
||||
(p_n_to_p_m - p_n_from_p_m)
|
||||
@ -4505,8 +4505,8 @@ static int ft_event_check_bookmarks(void)
|
||||
OPAL_OUTPUT_VERBOSE((10, mca_crcp_bkmrk_component.super.output_handle,
|
||||
"crcp:bkmrk: %s --> %s "
|
||||
"Total Sent (%4d) = Matched Recv. (%4d). Peer needs %4d.\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer_ref->proc_name)),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(&(peer_ref->proc_name)),
|
||||
p_n_to_p_m,
|
||||
p_n_from_p_m,
|
||||
(p_n_to_p_m - p_n_from_p_m)
|
||||
@ -4519,7 +4519,7 @@ static int ft_event_check_bookmarks(void)
|
||||
if( OMPI_SUCCESS != (ret = send_msg_details(peer_ref, p_n_to_p_m, p_n_from_p_m) ) ) {
|
||||
opal_output(mca_crcp_bkmrk_component.super.output_handle,
|
||||
"crcp:bkmrk: check_bookmarks: Unable to send message details to peer %s: Return %d\n",
|
||||
ORTE_NAME_PRINT(&peer_ref->proc_name),
|
||||
orte_util_print_name_args(&peer_ref->proc_name),
|
||||
ret);
|
||||
return ret;
|
||||
}
|
||||
@ -4538,8 +4538,8 @@ static int ft_event_check_bookmarks(void)
|
||||
"crcp:bkmrk: %s --> %s "
|
||||
"Matched Sent (%4d) = Total Recv. (%4d) => Diff (%4d). "
|
||||
" WARNING: I received more than the peer sent. :(\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer_ref->proc_name)),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(&(peer_ref->proc_name)),
|
||||
p_n_to_p_m,
|
||||
p_n_from_p_m,
|
||||
(p_n_to_p_m - p_n_from_p_m)
|
||||
@ -4552,8 +4552,8 @@ static int ft_event_check_bookmarks(void)
|
||||
OPAL_OUTPUT_VERBOSE((10, mca_crcp_bkmrk_component.super.output_handle,
|
||||
"crcp:bkmrk: %s <-- %s "
|
||||
"Matched Sent (%4d) = Total Recv. (%4d). I need %4d.\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer_ref->proc_name)),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(&(peer_ref->proc_name)),
|
||||
p_n_to_p_m,
|
||||
p_n_from_p_m,
|
||||
(p_n_to_p_m - p_n_from_p_m)
|
||||
@ -4565,7 +4565,7 @@ static int ft_event_check_bookmarks(void)
|
||||
if( OMPI_SUCCESS != (ret = recv_msg_details(peer_ref, p_n_to_p_m, p_n_from_p_m) ) ) {
|
||||
opal_output(mca_crcp_bkmrk_component.super.output_handle,
|
||||
"crcp:bkmrk: check_bookmarks: Unable to recv message details from peer %s: Return %d\n",
|
||||
ORTE_NAME_PRINT(&peer_ref->proc_name),
|
||||
orte_util_print_name_args(&peer_ref->proc_name),
|
||||
ret);
|
||||
return ret;
|
||||
}
|
||||
@ -4586,8 +4586,8 @@ static int ft_event_check_bookmarks(void)
|
||||
"crcp:bkmrk: %s --> %s "
|
||||
"Matched Sent (%4d) = Total Recv. (%4d) => Diff (%4d). "
|
||||
" WARNING: I received more than the peer sent. :(\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer_ref->proc_name)),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(&(peer_ref->proc_name)),
|
||||
p_n_to_p_m,
|
||||
p_n_from_p_m,
|
||||
(p_n_to_p_m - p_n_from_p_m)
|
||||
@ -4600,8 +4600,8 @@ static int ft_event_check_bookmarks(void)
|
||||
OPAL_OUTPUT_VERBOSE((10, mca_crcp_bkmrk_component.super.output_handle,
|
||||
"crcp:bkmrk: %s <-- %s "
|
||||
"Matched Sent (%4d) = Total Recv. (%4d). I need %4d.\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer_ref->proc_name)),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(&(peer_ref->proc_name)),
|
||||
p_n_to_p_m,
|
||||
p_n_from_p_m,
|
||||
(p_n_to_p_m - p_n_from_p_m)
|
||||
@ -4613,7 +4613,7 @@ static int ft_event_check_bookmarks(void)
|
||||
if( OMPI_SUCCESS != (ret = recv_msg_details(peer_ref, p_n_to_p_m, p_n_from_p_m) ) ) {
|
||||
opal_output(mca_crcp_bkmrk_component.super.output_handle,
|
||||
"crcp:bkmrk: check_bookmarks: Unable to recv message details from peer %s: Return %d\n",
|
||||
ORTE_NAME_PRINT(&peer_ref->proc_name),
|
||||
orte_util_print_name_args(&peer_ref->proc_name),
|
||||
ret);
|
||||
return ret;
|
||||
}
|
||||
@ -4632,8 +4632,8 @@ static int ft_event_check_bookmarks(void)
|
||||
"crcp:bkmrk: %s --> %s "
|
||||
"Total Sent (%4d) = Matched Recv. (%4d) => Diff (%4d). "
|
||||
" WARNING: Peer received more than was sent. :(\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer_ref->proc_name)),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(&(peer_ref->proc_name)),
|
||||
p_n_to_p_m,
|
||||
p_n_from_p_m,
|
||||
(p_n_to_p_m - p_n_from_p_m)
|
||||
@ -4646,8 +4646,8 @@ static int ft_event_check_bookmarks(void)
|
||||
OPAL_OUTPUT_VERBOSE((10, mca_crcp_bkmrk_component.super.output_handle,
|
||||
"crcp:bkmrk: %s --> %s "
|
||||
"Total Sent (%4d) = Matched Recv. (%4d). Peer needs %4d.\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer_ref->proc_name)),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(&(peer_ref->proc_name)),
|
||||
p_n_to_p_m,
|
||||
p_n_from_p_m,
|
||||
(p_n_to_p_m - p_n_from_p_m)
|
||||
@ -4660,7 +4660,7 @@ static int ft_event_check_bookmarks(void)
|
||||
if( OMPI_SUCCESS != (ret = send_msg_details(peer_ref, p_n_to_p_m, p_n_from_p_m) ) ) {
|
||||
opal_output(mca_crcp_bkmrk_component.super.output_handle,
|
||||
"crcp:bkmrk: check_bookmarks: Unable to send message details to peer %s: Return %d\n",
|
||||
ORTE_NAME_PRINT(&peer_ref->proc_name),
|
||||
orte_util_print_name_args(&peer_ref->proc_name),
|
||||
ret);
|
||||
return ret;
|
||||
}
|
||||
@ -4685,7 +4685,7 @@ static int ft_event_post_drain_acks(void)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((10, mca_crcp_bkmrk_component.super.output_handle,
|
||||
"crcp:bkmrk: %s Wait on %d Drain ACK Messages.\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
(int)req_size));
|
||||
|
||||
/*
|
||||
@ -4705,8 +4705,8 @@ static int ft_event_post_drain_acks(void)
|
||||
NULL) ) ) {
|
||||
opal_output(mca_crcp_bkmrk_component.super.output_handle,
|
||||
"crcp:bkmrk: %s <-- %s: Failed to post a RML receive to the peer\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(drain_msg_ack->peer)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(&(drain_msg_ack->peer)));
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
@ -4748,8 +4748,8 @@ static void drain_message_ack_cbfunc(int status,
|
||||
drain_msg_ack->complete = true;
|
||||
OPAL_OUTPUT_VERBOSE((5, mca_crcp_bkmrk_component.super.output_handle,
|
||||
"crcp:bkmrk: %s --> %s Received ACK of FLUSH from peer\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(sender) ));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(sender) ));
|
||||
return;
|
||||
}
|
||||
}
|
||||
@ -4757,8 +4757,8 @@ static void drain_message_ack_cbfunc(int status,
|
||||
|
||||
opal_output(mca_crcp_bkmrk_component.super.output_handle,
|
||||
"crcp:bkmrk: %s --> %s ERROR: Unable to match ACK to peer\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(sender) );
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(sender) );
|
||||
|
||||
cleanup:
|
||||
return;
|
||||
@ -4891,8 +4891,8 @@ static int ft_event_post_drained(void)
|
||||
if( peer_total > 0 || stall_for_completion ) {
|
||||
OPAL_OUTPUT_VERBOSE((10, mca_crcp_bkmrk_component.super.output_handle,
|
||||
"crcp:bkmrk: %s <-- %s Will be draining %4d messages from this peer. Total %4d %s\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(cur_peer_ref->proc_name)),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(&(cur_peer_ref->proc_name)),
|
||||
peer_total,
|
||||
quiesce_request_count,
|
||||
(stall_for_completion ? "(And Stalling)" : "") ));
|
||||
@ -4922,8 +4922,8 @@ static int ft_event_post_drain_message(ompi_crcp_bkmrk_pml_drain_message_ref_t
|
||||
if( content_ref->already_posted ) {
|
||||
OPAL_OUTPUT_VERBOSE((15, mca_crcp_bkmrk_component.super.output_handle,
|
||||
"crcp:bkmrk: %s <-- %s Found a message that we do not need to post.\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(drain_msg_ref->proc_name)) ));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(&(drain_msg_ref->proc_name)) ));
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
@ -4936,8 +4936,8 @@ static int ft_event_post_drain_message(ompi_crcp_bkmrk_pml_drain_message_ref_t
|
||||
*/
|
||||
OPAL_OUTPUT_VERBOSE((20, mca_crcp_bkmrk_component.super.output_handle,
|
||||
"crcp:bkmrk: %s <-- %s Posting a message to be drained from rank %d.\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(drain_msg_ref->proc_name)),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(&(drain_msg_ref->proc_name)),
|
||||
drain_msg_ref->rank));
|
||||
if( OMPI_SUCCESS != (ret = wrapped_pml_module->pml_irecv(content_ref->buffer,
|
||||
(drain_msg_ref->count * drain_msg_ref->ddt_size),
|
||||
@ -4948,8 +4948,8 @@ static int ft_event_post_drain_message(ompi_crcp_bkmrk_pml_drain_message_ref_t
|
||||
&(content_ref->request) ) ) ) {
|
||||
opal_output(mca_crcp_bkmrk_component.super.output_handle,
|
||||
"crcp:bkmrk: %s <-- %s Failed to post the Draining PML iRecv\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(drain_msg_ref->proc_name)) );
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(&(drain_msg_ref->proc_name)) );
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -4967,7 +4967,7 @@ static int ft_event_wait_quiesce(void)
|
||||
if( OMPI_SUCCESS != (ret = wait_quiesce_drained() ) ) {
|
||||
opal_output(mca_crcp_bkmrk_component.super.output_handle,
|
||||
"crcp:bkmrk: wait_quiesce: %s Failed to quiesce drained messages\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME) );
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME) );
|
||||
exit_status = ret;
|
||||
goto cleanup;
|
||||
}
|
||||
@ -4978,7 +4978,7 @@ static int ft_event_wait_quiesce(void)
|
||||
if( OMPI_SUCCESS != (ret = wait_quiesce_drain_ack() ) ) {
|
||||
opal_output(mca_crcp_bkmrk_component.super.output_handle,
|
||||
"crcp:bkmrk: wait_quiesce: %s Failed to recv all drain ACKs\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME) );
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME) );
|
||||
exit_status = ret;
|
||||
goto cleanup;
|
||||
}
|
||||
@ -5000,7 +5000,7 @@ static int wait_quiesce_drained(void)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, mca_crcp_bkmrk_component.super.output_handle,
|
||||
"crcp:bkmrk: %s Waiting on %d messages to drain\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
(int)quiesce_request_count));
|
||||
|
||||
/*
|
||||
@ -5044,8 +5044,8 @@ static int wait_quiesce_drained(void)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, mca_crcp_bkmrk_component.super.output_handle,
|
||||
"crcp:bkmrk: %s --> %s Send ACKs to Peer\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(cur_peer_ref->proc_name)) ));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(&(cur_peer_ref->proc_name)) ));
|
||||
|
||||
/* Send All Clear to Peer */
|
||||
if (NULL == (buffer = OBJ_NEW(opal_buffer_t))) {
|
||||
@ -5145,7 +5145,7 @@ static int coord_request_wait_all( size_t count,
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((15, mca_crcp_bkmrk_component.super.output_handle,
|
||||
"crcp:bkmrk: %s Request Wait: Done with idx %d of %d\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
(int)i, (int)count));
|
||||
}
|
||||
|
||||
@ -5184,7 +5184,7 @@ static int wait_quiesce_drain_ack(void)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((10, mca_crcp_bkmrk_component.super.output_handle,
|
||||
"crcp:bkmrk: %s Waiting on %d Drain ACK messages\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
num_outstanding));
|
||||
|
||||
while(0 < num_outstanding) {
|
||||
@ -5240,8 +5240,8 @@ static int send_bookmarks(int peer_idx)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((15, mca_crcp_bkmrk_component.super.output_handle,
|
||||
"crcp:bkmrk: %s --> %s Sending bookmark (S[%6d] R[%6d])\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&peer_name),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(&peer_name),
|
||||
peer_ref->total_msgs_sent,
|
||||
peer_ref->total_msgs_recvd));
|
||||
|
||||
@ -5261,7 +5261,7 @@ static int send_bookmarks(int peer_idx)
|
||||
if ( 0 > ( ret = orte_rml.send_buffer(&peer_name, buffer, OMPI_CRCP_COORD_BOOKMARK_TAG, 0)) ) {
|
||||
opal_output(mca_crcp_bkmrk_component.super.output_handle,
|
||||
"crcp:bkmrk: send_bookmarks: Failed to send bookmark to peer %s: Return %d\n",
|
||||
ORTE_NAME_PRINT(&peer_name),
|
||||
orte_util_print_name_args(&peer_name),
|
||||
ret);
|
||||
exit_status = ret;
|
||||
goto cleanup;
|
||||
@ -5297,7 +5297,7 @@ static int recv_bookmarks(int peer_idx)
|
||||
NULL) ) ) {
|
||||
opal_output(mca_crcp_bkmrk_component.super.output_handle,
|
||||
"crcp:bkmrk: recv_bookmarks: Failed to post receive bookmark from peer %s: Return %d\n",
|
||||
ORTE_NAME_PRINT(&peer_name),
|
||||
orte_util_print_name_args(&peer_name),
|
||||
ret);
|
||||
exit_status = ret;
|
||||
goto cleanup;
|
||||
@ -5348,8 +5348,8 @@ static void recv_bookmarks_cbfunc(int status,
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((15, mca_crcp_bkmrk_component.super.output_handle,
|
||||
"crcp:bkmrk: %s <-- %s Received bookmark (S[%6d] R[%6d]) vs. (S[%6d] R[%6d])\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(sender),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(sender),
|
||||
peer_ref->matched_msgs_sent,
|
||||
peer_ref->matched_msgs_recvd,
|
||||
peer_ref->total_msgs_sent,
|
||||
@ -5413,8 +5413,8 @@ static int send_msg_details(ompi_crcp_bkmrk_pml_peer_ref_t *peer_ref,
|
||||
if(OMPI_SUCCESS != (ret = do_send_msg_detail(peer_ref, msg_ref, &num_matches, &p_total_found, &finished)) ) {
|
||||
opal_output(mca_crcp_bkmrk_component.super.output_handle,
|
||||
"crcp:bkmrk: send_msg_details: %s --> %s Failed to send message details to peer. Return %d\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer_ref->proc_name)),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(&(peer_ref->proc_name)),
|
||||
ret);
|
||||
}
|
||||
|
||||
@ -5475,8 +5475,8 @@ static int send_msg_details(ompi_crcp_bkmrk_pml_peer_ref_t *peer_ref,
|
||||
opal_list_append(&drained_msg_ack_list, &(d_msg_ack->super));
|
||||
OPAL_OUTPUT_VERBOSE((10, mca_crcp_bkmrk_component.super.output_handle,
|
||||
"crcp:bkmrk: %s <-> %s Message Inflight! Will wait on ACK from this peer.\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer_ref->proc_name))));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(&(peer_ref->proc_name))));
|
||||
|
||||
END_TIMER(CRCP_TIMER_CKPT_CHECK_PEER_S);
|
||||
DISPLAY_INDV_TIMER(CRCP_TIMER_CKPT_CHECK_PEER_S, peer_ref->proc_name.vpid, total_details_sent);
|
||||
@ -5553,7 +5553,7 @@ static int do_send_msg_detail(ompi_crcp_bkmrk_pml_peer_ref_t *peer_ref,
|
||||
OMPI_CRCP_COORD_BOOKMARK_TAG, 0)) ) {
|
||||
opal_output(mca_crcp_bkmrk_component.super.output_handle,
|
||||
"crcp:bkmrk: do_send_msg_detail: Unable to send message details to peer %s: Return %d\n",
|
||||
ORTE_NAME_PRINT(&peer_ref->proc_name),
|
||||
orte_util_print_name_args(&peer_ref->proc_name),
|
||||
ret);
|
||||
|
||||
exit_status = OMPI_ERROR;
|
||||
@ -5580,8 +5580,8 @@ static int do_send_msg_detail(ompi_crcp_bkmrk_pml_peer_ref_t *peer_ref,
|
||||
OMPI_CRCP_COORD_BOOKMARK_TAG, 0) ) ) {
|
||||
opal_output(mca_crcp_bkmrk_component.super.output_handle,
|
||||
"crcp:bkmrk: do_send_msg_detail: %s --> %s Failed to receive ACK buffer from peer. Return %d\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer_ref->proc_name)),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(&(peer_ref->proc_name)),
|
||||
ret);
|
||||
exit_status = ret;
|
||||
goto cleanup;
|
||||
@ -5667,8 +5667,8 @@ static int recv_msg_details(ompi_crcp_bkmrk_pml_peer_ref_t *peer_ref,
|
||||
opal_output(mca_crcp_bkmrk_component.super.output_handle,
|
||||
"crcp:bkmrk: recv_msg_details: %s <-- %s "
|
||||
"Failed to receive message detail from peer. Return %d\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer_ref->proc_name)),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(&(peer_ref->proc_name)),
|
||||
ret);
|
||||
exit_status = ret;
|
||||
goto cleanup;
|
||||
@ -5688,8 +5688,8 @@ static int recv_msg_details(ompi_crcp_bkmrk_pml_peer_ref_t *peer_ref,
|
||||
opal_output(mca_crcp_bkmrk_component.super.output_handle,
|
||||
"crcp:bkmrk: recv_msg_details: %s <-- %s "
|
||||
"Failed to check message detail from peer. Return %d\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer_ref->proc_name)),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(&(peer_ref->proc_name)),
|
||||
ret);
|
||||
exit_status = ret;
|
||||
goto cleanup;
|
||||
@ -5700,8 +5700,8 @@ static int recv_msg_details(ompi_crcp_bkmrk_pml_peer_ref_t *peer_ref,
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((10, mca_crcp_bkmrk_component.super.output_handle,
|
||||
"crcp:bkmrk: %s <-- %s Recv Detail: Stage --: [%3d / %3d] [%3d, %3d, %s]",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer_ref->proc_name)),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(&(peer_ref->proc_name)),
|
||||
need, found,
|
||||
num_resolved, total_details_recv,
|
||||
( need <= found ? "T" : "F") ));
|
||||
@ -5718,8 +5718,8 @@ static int recv_msg_details(ompi_crcp_bkmrk_pml_peer_ref_t *peer_ref,
|
||||
if(OMPI_SUCCESS != (ret = do_recv_msg_detail_resp(peer_ref, response, num_resolved, found))) {
|
||||
opal_output(mca_crcp_bkmrk_component.super.output_handle,
|
||||
"crcp:bkmrk: recv_msg_details: %s <-- %s Failed to respond to peer. Return %d\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer_ref->proc_name)),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(&(peer_ref->proc_name)),
|
||||
ret);
|
||||
exit_status = ret;
|
||||
goto cleanup;
|
||||
@ -5754,8 +5754,8 @@ static int do_recv_msg_detail(ompi_crcp_bkmrk_pml_peer_ref_t *peer_ref,
|
||||
if ( 0 > (ret = orte_rml.recv_buffer(&peer_ref->proc_name, buffer, OMPI_CRCP_COORD_BOOKMARK_TAG, 0) ) ) {
|
||||
opal_output(mca_crcp_bkmrk_component.super.output_handle,
|
||||
"crcp:bkmrk: do_recv_msg_detail: %s <-- %s Failed to receive buffer from peer. Return %d\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer_ref->proc_name)),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(&(peer_ref->proc_name)),
|
||||
ret);
|
||||
exit_status = ret;
|
||||
goto cleanup;
|
||||
@ -5814,8 +5814,8 @@ static int do_recv_msg_detail_check_drain(ompi_crcp_bkmrk_pml_peer_ref_t *peer_r
|
||||
OPAL_OUTPUT_VERBOSE((10, mca_crcp_bkmrk_component.super.output_handle,
|
||||
"crcp:bkmrk: %s <-- %s "
|
||||
"Stage 0: Ck.Drain: [TR %3d/MS %3d] sent %4d, unres %4d, res %4d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer_ref->proc_name)),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(&(peer_ref->proc_name)),
|
||||
peer_ref->total_msgs_recvd,
|
||||
peer_ref->matched_msgs_sent,
|
||||
p_num_sent,
|
||||
@ -5837,8 +5837,8 @@ static int do_recv_msg_detail_check_drain(ompi_crcp_bkmrk_pml_peer_ref_t *peer_r
|
||||
opal_output(mca_crcp_bkmrk_component.super.output_handle,
|
||||
"crcp:bkmrk: recv_msg_detail_check: %s -- %s "
|
||||
"Failed to determine if we have received this message. Return %d\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer_ref->proc_name)),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(&(peer_ref->proc_name)),
|
||||
ret);
|
||||
exit_status = ret;
|
||||
goto cleanup;
|
||||
@ -5887,8 +5887,8 @@ static int do_recv_msg_detail_check_drain(ompi_crcp_bkmrk_pml_peer_ref_t *peer_r
|
||||
OPAL_OUTPUT_VERBOSE((10, mca_crcp_bkmrk_component.super.output_handle,
|
||||
"crcp:bkmrk: %s <-- %s "
|
||||
"Stage 1: Ck.Drain: [TR %3d/MS %3d] sent %4d, unres %4d, res %4d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer_ref->proc_name)),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(&(peer_ref->proc_name)),
|
||||
peer_ref->total_msgs_recvd,
|
||||
peer_ref->matched_msgs_sent,
|
||||
p_num_sent,
|
||||
@ -5972,8 +5972,8 @@ static int do_recv_msg_detail_check_drain(ompi_crcp_bkmrk_pml_peer_ref_t *peer_r
|
||||
OPAL_OUTPUT_VERBOSE((10, mca_crcp_bkmrk_component.super.output_handle,
|
||||
"crcp:bkmrk: %s <-- %s "
|
||||
"Stage 2: Ck.Drain: [TR %3d/MS %3d] sent %4d, unres %4d, res %4d, active %4d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer_ref->proc_name)),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(&(peer_ref->proc_name)),
|
||||
peer_ref->total_msgs_recvd,
|
||||
peer_ref->matched_msgs_sent,
|
||||
p_num_sent,
|
||||
@ -6021,8 +6021,8 @@ static int do_recv_msg_detail_check_drain(ompi_crcp_bkmrk_pml_peer_ref_t *peer_r
|
||||
OPAL_OUTPUT_VERBOSE((10, mca_crcp_bkmrk_component.super.output_handle,
|
||||
"crcp:bkmrk: %s <-- %s "
|
||||
"Recv Check: Found a message that is 'active'! Prepare to STALL.\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer_ref->proc_name)) ));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(&(peer_ref->proc_name)) ));
|
||||
stall_for_completion = true;
|
||||
}
|
||||
else {
|
||||
@ -6030,8 +6030,8 @@ static int do_recv_msg_detail_check_drain(ompi_crcp_bkmrk_pml_peer_ref_t *peer_r
|
||||
"crcp:bkmrk: %s <-- %s "
|
||||
"Recv Check: Found a message that is 'active', but is not the current recv! "
|
||||
"No stall required [%3d, %3d, %3d, %3d].\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer_ref->proc_name)),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(&(peer_ref->proc_name)),
|
||||
(int)current_msg_id,
|
||||
(int)current_msg_type,
|
||||
(int)posted_recv_msg_ref->msg_id,
|
||||
@ -6089,8 +6089,8 @@ static int do_recv_msg_detail_check_drain(ompi_crcp_bkmrk_pml_peer_ref_t *peer_r
|
||||
OPAL_OUTPUT_VERBOSE((10, mca_crcp_bkmrk_component.super.output_handle,
|
||||
"crcp:bkmrk: %s <-- %s "
|
||||
"Stage 3: Ck.Drain: [TR %3d/MS %3d] sent %4d, unres %4d, res %4d, active %4d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer_ref->proc_name)),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(&(peer_ref->proc_name)),
|
||||
peer_ref->total_msgs_recvd,
|
||||
peer_ref->matched_msgs_sent,
|
||||
p_num_sent,
|
||||
@ -6126,8 +6126,8 @@ static int do_recv_msg_detail_check_drain(ompi_crcp_bkmrk_pml_peer_ref_t *peer_r
|
||||
OPAL_OUTPUT_VERBOSE((10, mca_crcp_bkmrk_component.super.output_handle,
|
||||
"crcp:bkmrk: %s <-- %s "
|
||||
"Stage 4: Ck.Drain: [TR %3d/MS %3d] sent %4d, unres %4d, res %4d, active %4d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer_ref->proc_name)),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(&(peer_ref->proc_name)),
|
||||
peer_ref->total_msgs_recvd,
|
||||
peer_ref->matched_msgs_sent,
|
||||
p_num_sent,
|
||||
@ -6163,7 +6163,7 @@ static int do_recv_msg_detail_resp(ompi_crcp_bkmrk_pml_peer_ref_t *peer_ref,
|
||||
if ( 0 > ( ret = orte_rml.send_buffer(&peer_ref->proc_name, buffer, OMPI_CRCP_COORD_BOOKMARK_TAG, 0)) ) {
|
||||
opal_output(mca_crcp_bkmrk_component.super.output_handle,
|
||||
"crcp:bkmrk: recv_msg_detail_resp: Unable to send message detail response to peer %s: Return %d\n",
|
||||
ORTE_NAME_PRINT(&peer_ref->proc_name),
|
||||
orte_util_print_name_args(&peer_ref->proc_name),
|
||||
ret);
|
||||
exit_status = OMPI_ERROR;
|
||||
goto cleanup;
|
||||
@ -6478,8 +6478,8 @@ static void traffic_message_dump_peer(ompi_crcp_bkmrk_pml_peer_ref_t *peer_ref,
|
||||
|
||||
opal_output(0, "------------- %s ---------------------------------", msg);
|
||||
opal_output(0, "%s <-> %s Totals Sent [ %3d / %3d ] Recv [ %3d / %3d ]",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer_ref->proc_name)),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(&(peer_ref->proc_name)),
|
||||
peer_ref->total_msgs_sent,
|
||||
peer_ref->matched_msgs_sent,
|
||||
peer_ref->total_msgs_recvd,
|
||||
|
@ -137,7 +137,7 @@ static int connect_accept ( ompi_communicator_t *comm, int root,
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, ompi_dpm_base_output,
|
||||
"%s dpm:orte:connect_accept with port %s %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
port_string, send_first ? "sending first" : "recv first"));
|
||||
|
||||
/* set default error return */
|
||||
@ -199,8 +199,8 @@ static int connect_accept ( ompi_communicator_t *comm, int root,
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((3, ompi_dpm_base_output,
|
||||
"%s dpm:orte:connect_accept adding %s to proc list",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&proc_list[i]->proc_name)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(&proc_list[i]->proc_name)));
|
||||
ompi_proc_pack(proc_list, size, nbuf);
|
||||
}
|
||||
|
||||
@ -218,13 +218,13 @@ static int connect_accept ( ompi_communicator_t *comm, int root,
|
||||
if ( send_first ) {
|
||||
OPAL_OUTPUT_VERBOSE((3, ompi_dpm_base_output,
|
||||
"%s dpm:orte:connect_accept sending first to %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&port)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(&port)));
|
||||
rc = orte_rml.send_buffer(&port, nbuf, tag, 0);
|
||||
/* setup to recv */
|
||||
OPAL_OUTPUT_VERBOSE((3, ompi_dpm_base_output,
|
||||
"%s dpm:orte:connect_accept waiting for response",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
recv_completed = false;
|
||||
rc = orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, tag,
|
||||
ORTE_RML_NON_PERSISTENT, recv_cb, NULL);
|
||||
@ -232,13 +232,13 @@ static int connect_accept ( ompi_communicator_t *comm, int root,
|
||||
ORTE_PROGRESSED_WAIT(recv_completed, 0, 1);
|
||||
OPAL_OUTPUT_VERBOSE((3, ompi_dpm_base_output,
|
||||
"%s dpm:orte:connect_accept got data from %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&carport)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(&carport)));
|
||||
|
||||
} else {
|
||||
OPAL_OUTPUT_VERBOSE((3, ompi_dpm_base_output,
|
||||
"%s dpm:orte:connect_accept recving first",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
/* setup to recv */
|
||||
recv_completed = false;
|
||||
rc = orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, tag,
|
||||
@ -248,8 +248,8 @@ static int connect_accept ( ompi_communicator_t *comm, int root,
|
||||
/* now send our info */
|
||||
OPAL_OUTPUT_VERBOSE((3, ompi_dpm_base_output,
|
||||
"%s dpm:orte:connect_accept sending info to %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&carport)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(&carport)));
|
||||
rc = orte_rml.send_buffer(&carport, nbuf, tag, 0);
|
||||
}
|
||||
|
||||
@ -269,7 +269,7 @@ static int connect_accept ( ompi_communicator_t *comm, int root,
|
||||
/* bcast the buffer-length to all processes in the local comm */
|
||||
OPAL_OUTPUT_VERBOSE((3, ompi_dpm_base_output,
|
||||
"%s dpm:orte:connect_accept bcast buffer length",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
rc = comm->c_coll.coll_bcast (&rnamebuflen_int, 1, MPI_INT, root, comm,
|
||||
comm->c_coll.coll_bcast_module);
|
||||
if ( OMPI_SUCCESS != rc ) {
|
||||
@ -293,7 +293,7 @@ static int connect_accept ( ompi_communicator_t *comm, int root,
|
||||
*/
|
||||
OPAL_OUTPUT_VERBOSE((3, ompi_dpm_base_output,
|
||||
"%s dpm:orte:connect_accept bcast proc list",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
rc = comm->c_coll.coll_bcast (rnamebuf, rnamebuflen_int, MPI_BYTE, root, comm,
|
||||
comm->c_coll.coll_bcast_module);
|
||||
if ( OMPI_SUCCESS != rc ) {
|
||||
@ -322,7 +322,7 @@ static int connect_accept ( ompi_communicator_t *comm, int root,
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((3, ompi_dpm_base_output,
|
||||
"%s dpm:orte:connect_accept unpacked %d new procs",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), new_proc_len));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME), new_proc_len));
|
||||
|
||||
/* If we added new procs, we need to do the modex and then call
|
||||
PML add_procs */
|
||||
@ -339,8 +339,8 @@ static int connect_accept ( ompi_communicator_t *comm, int root,
|
||||
opal_list_append(&all_procs, &name->item);
|
||||
OPAL_OUTPUT_VERBOSE((3, ompi_dpm_base_output,
|
||||
"%s dpm:orte:connect_accept send first adding %s to allgather list",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&name->name)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(&name->name)));
|
||||
}
|
||||
for (i = 0 ; i < group->grp_proc_count ; ++i) {
|
||||
name = OBJ_NEW(orte_namelist_t);
|
||||
@ -348,8 +348,8 @@ static int connect_accept ( ompi_communicator_t *comm, int root,
|
||||
opal_list_append(&all_procs, &name->item);
|
||||
OPAL_OUTPUT_VERBOSE((3, ompi_dpm_base_output,
|
||||
"%s dpm:orte:connect_accept send first adding %s to allgather list",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&name->name)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(&name->name)));
|
||||
}
|
||||
|
||||
} else {
|
||||
@ -359,8 +359,8 @@ static int connect_accept ( ompi_communicator_t *comm, int root,
|
||||
opal_list_append(&all_procs, &name->item);
|
||||
OPAL_OUTPUT_VERBOSE((3, ompi_dpm_base_output,
|
||||
"%s dpm:orte:connect_accept recv first adding %s to allgather list",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&name->name)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(&name->name)));
|
||||
}
|
||||
for (i = 0 ; i < rsize ; ++i) {
|
||||
name = OBJ_NEW(orte_namelist_t);
|
||||
@ -368,8 +368,8 @@ static int connect_accept ( ompi_communicator_t *comm, int root,
|
||||
opal_list_append(&all_procs, &name->item);
|
||||
OPAL_OUTPUT_VERBOSE((3, ompi_dpm_base_output,
|
||||
"%s dpm:orte:connect_accept recv first adding %s to allgather list",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&name->name)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(&name->name)));
|
||||
}
|
||||
|
||||
}
|
||||
@ -917,7 +917,7 @@ static int dyn_init(void)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, ompi_dpm_base_output,
|
||||
"%s dpm:orte:dyn_init with port %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
port_name));
|
||||
|
||||
rc = connect_accept (MPI_COMM_WORLD, root, port_name, send_first, &newcomm);
|
||||
|
@ -136,7 +136,7 @@ mca_mpool_base_module_t* mca_mpool_base_module_create(
|
||||
#endif
|
||||
} else {
|
||||
orte_show_help("help-mpool-base.txt", "leave pinned failed",
|
||||
true, ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
true, orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_proc_info.nodename);
|
||||
return NULL;
|
||||
}
|
||||
|
@ -172,13 +172,13 @@ void mca_mpool_base_tree_print(void)
|
||||
if (num_leaks <= ompi_debug_show_mpi_alloc_mem_leaks ||
|
||||
ompi_debug_show_mpi_alloc_mem_leaks < 0) {
|
||||
orte_show_help("help-mpool-base.txt", "all mem leaks",
|
||||
true, ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
true, orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_proc_info.nodename,
|
||||
orte_proc_info.pid, leak_msg);
|
||||
} else {
|
||||
int i = num_leaks - ompi_debug_show_mpi_alloc_mem_leaks;
|
||||
orte_show_help("help-mpool-base.txt", "some mem leaks",
|
||||
true, ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
true, orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_proc_info.nodename,
|
||||
orte_proc_info.pid, leak_msg, i,
|
||||
(i > 1) ? "s were" : " was",
|
||||
|
@ -439,7 +439,7 @@ void mca_mpool_rdma_finalize(struct mca_mpool_base_module_t *mpool)
|
||||
if(true == mca_mpool_rdma_component.print_stats) {
|
||||
opal_output(0, "%s rdma: stats "
|
||||
"(hit/miss/found/not found/evicted): %d/%d/%d/%d/%d\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
mpool_rdma->stat_cache_hit, mpool_rdma->stat_cache_miss,
|
||||
mpool_rdma->stat_cache_found, mpool_rdma->stat_cache_notfound,
|
||||
mpool_rdma->stat_evicted);
|
||||
|
@ -378,14 +378,14 @@ mca_pml_base_pml_check_selected(const char *my_pml,
|
||||
(0 != strcmp(my_pml, remote_pml))) {
|
||||
if (procs[0]->proc_hostname) {
|
||||
opal_output(0, "%s selected pml %s, but peer %s on %s selected pml %s",
|
||||
ORTE_NAME_PRINT(&ompi_proc_local()->proc_name),
|
||||
my_pml, ORTE_NAME_PRINT(&procs[0]->proc_name),
|
||||
orte_util_print_name_args(&ompi_proc_local()->proc_name),
|
||||
my_pml, orte_util_print_name_args(&procs[0]->proc_name),
|
||||
procs[0]->proc_hostname,
|
||||
remote_pml);
|
||||
} else {
|
||||
opal_output(0, "%s selected pml %s, but peer %s selected pml %s",
|
||||
ORTE_NAME_PRINT(&ompi_proc_local()->proc_name),
|
||||
my_pml, ORTE_NAME_PRINT(&procs[0]->proc_name),
|
||||
orte_util_print_name_args(&ompi_proc_local()->proc_name),
|
||||
my_pml, orte_util_print_name_args(&procs[0]->proc_name),
|
||||
remote_pml);
|
||||
}
|
||||
free(remote_pml); /* cleanup before returning */
|
||||
|
@ -70,7 +70,7 @@ static void setup_server(void)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, ompi_pubsub_base_output,
|
||||
"%s pubsub:orte: setting up server at URI %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
(NULL == mca_pubsub_orte_component.server_uri) ? "NULL" : mca_pubsub_orte_component.server_uri));
|
||||
|
||||
/* flag setup as completed so we only pass through here once */
|
||||
@ -113,8 +113,8 @@ static void setup_server(void)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, ompi_pubsub_base_output,
|
||||
"%s pubsub:orte: server %s setup",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&mca_pubsub_orte_component.server)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(&mca_pubsub_orte_component.server)));
|
||||
}
|
||||
|
||||
/*
|
||||
@ -175,7 +175,7 @@ static int publish ( char *service_name, ompi_info_t *info, char *port_name )
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, ompi_pubsub_base_output,
|
||||
"%s pubsub:orte: publishing service %s scope %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
service_name, global_scope ? "Global" : "Local"));
|
||||
|
||||
/* construct the buffer */
|
||||
@ -323,7 +323,7 @@ static char* lookup ( char *service_name, ompi_info_t *info )
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, ompi_pubsub_base_output,
|
||||
"%s pubsub:orte: lookup service %s scope %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
service_name, lookup[0]));
|
||||
|
||||
/* go find the value */
|
||||
@ -475,7 +475,7 @@ static int unpublish ( char *service_name, ompi_info_t *info )
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, ompi_pubsub_base_output,
|
||||
"%s pubsub:orte: unpublish service %s scope %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
service_name, global_scope ? "Global" : "Local"));
|
||||
|
||||
/* construct the buffer */
|
||||
|
@ -262,7 +262,7 @@ int main(int argc, char *argv[])
|
||||
opal_progress_set_event_flag(OPAL_EVLOOP_ONCE);
|
||||
|
||||
if (debug) {
|
||||
opal_output(0, "%s ompi-server: up and running!", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
||||
opal_output(0, "%s ompi-server: up and running!", orte_util_print_name_args(ORTE_PROC_MY_NAME));
|
||||
}
|
||||
|
||||
/* wait to hear we are done */
|
||||
@ -282,7 +282,7 @@ static void shutdown_callback(int fd, short flags, void *arg)
|
||||
int ret;
|
||||
|
||||
if (debug) {
|
||||
opal_output(0, "%s ompi-server: finalizing", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
||||
opal_output(0, "%s ompi-server: finalizing", orte_util_print_name_args(ORTE_PROC_MY_NAME));
|
||||
}
|
||||
|
||||
/* Finalize and clean up ourselves */
|
||||
|
@ -48,7 +48,7 @@ void orte_errmgr_base_log(int error_code, char *filename, int line)
|
||||
}
|
||||
|
||||
opal_output(0, "%s ORTE_ERROR_LOG: %s in file %s at line %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
ORTE_ERROR_NAME(error_code), filename, line);
|
||||
}
|
||||
|
||||
|
@ -55,16 +55,16 @@ void orte_errmgr_default_proc_aborted(orte_process_name_t *name, int exit_code)
|
||||
if (!opal_atomic_trylock(&orte_abort_inprogress_lock)) { /* returns 1 if already locked */
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_errmgr_base_output,
|
||||
"%s errmgr:default: abort in progress, ignoring proc %s aborted with status %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(name), exit_code));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(name), exit_code));
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_errmgr_base_output,
|
||||
"%s errmgr:default: proc %s aborting with status %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(name), exit_code));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(name), exit_code));
|
||||
|
||||
orte_job_term_ordered = true;
|
||||
|
||||
@ -121,15 +121,15 @@ void orte_errmgr_default_incomplete_start(orte_jobid_t job, int exit_code)
|
||||
if (!opal_atomic_trylock(&orte_abort_inprogress_lock)) { /* returns 1 if already locked */
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_errmgr_base_output,
|
||||
"%s errmgr:default: abort in progress, ignoring incomplete start on job %s with status %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_JOBID_PRINT(job), exit_code));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_jobids(job), exit_code));
|
||||
return;
|
||||
}
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_errmgr_base_output,
|
||||
"%s errmgr:default: job %s reported incomplete start with status %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_JOBID_PRINT(job), exit_code));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_jobids(job), exit_code));
|
||||
|
||||
orte_job_term_ordered = true;
|
||||
|
||||
|
@ -172,15 +172,15 @@ static uint8_t proc_get_locality(orte_process_name_t *proc)
|
||||
if (nid->daemon == ORTE_PROC_MY_DAEMON->vpid) {
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
|
||||
"%s ess:alps: proc %s on LOCAL NODE",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(proc)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(proc)));
|
||||
return (OPAL_PROC_ON_NODE | OPAL_PROC_ON_CU | OPAL_PROC_ON_CLUSTER);
|
||||
}
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
|
||||
"%s ess:alps: proc %s is REMOTE",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(proc)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(proc)));
|
||||
|
||||
return OPAL_PROC_NON_LOCAL;
|
||||
|
||||
@ -196,9 +196,9 @@ static orte_vpid_t proc_get_daemon(orte_process_name_t *proc)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
|
||||
"%s ess:alps: proc %s is hosted by daemon %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(proc),
|
||||
ORTE_VPID_PRINT(nid->daemon)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(proc),
|
||||
orte_util_print_vpids(nid->daemon)));
|
||||
|
||||
return nid->daemon;
|
||||
}
|
||||
@ -214,8 +214,8 @@ static char* proc_get_hostname(orte_process_name_t *proc)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
|
||||
"%s ess:alps: proc %s is on host %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(proc),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(proc),
|
||||
nid->name));
|
||||
|
||||
return nid->name;
|
||||
@ -232,8 +232,8 @@ static uint32_t proc_get_arch(orte_process_name_t *proc)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
|
||||
"%s ess:alps: proc %s has arch %0x",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(proc),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(proc),
|
||||
nid->arch));
|
||||
|
||||
return nid->arch;
|
||||
@ -250,8 +250,8 @@ static int update_arch(orte_process_name_t *proc, uint32_t arch)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
|
||||
"%s ess:alps: updating proc %s to arch %0x",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(proc),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(proc),
|
||||
arch));
|
||||
|
||||
nid->arch = arch;
|
||||
@ -270,8 +270,8 @@ static orte_local_rank_t proc_get_local_rank(orte_process_name_t *proc)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
|
||||
"%s ess:alps: proc %s has local rank %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(proc),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(proc),
|
||||
(int)pmap->local_rank));
|
||||
|
||||
return pmap->local_rank;
|
||||
@ -288,8 +288,8 @@ static orte_node_rank_t proc_get_node_rank(orte_process_name_t *proc)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
|
||||
"%s ess:alps: proc %s has node rank %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(proc),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(proc),
|
||||
(int)pmap->node_rank));
|
||||
|
||||
return pmap->node_rank;
|
||||
@ -355,7 +355,7 @@ static int alps_set_name(void)
|
||||
ORTE_PROC_MY_NAME->vpid = (orte_vpid_t) cnos_get_rank() + starting_vpid;
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_ess_base_output,
|
||||
"ess:alps set name to %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
"ess:alps set name to %s", orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
|
||||
orte_proc_info.num_procs = (orte_std_cntr_t) cnos_get_size();
|
||||
|
||||
|
@ -122,7 +122,7 @@ int orte_ess_base_app_setup(void)
|
||||
/* setup my session directory */
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_debug_output,
|
||||
"%s setting up session dir with\n\ttmpdir: %s\n\thost %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
(NULL == orte_proc_info.tmpdir_base) ? "UNDEF" : orte_proc_info.tmpdir_base,
|
||||
orte_proc_info.nodename));
|
||||
|
||||
@ -286,7 +286,7 @@ void orte_ess_base_app_abort(int status, bool report)
|
||||
}
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_debug_output,
|
||||
"%s orte_ess_app_abort: dropping abort file %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), abort_file));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME), abort_file));
|
||||
fd = open(abort_file, O_CREAT, 0600);
|
||||
if (0 < fd) close(fd);
|
||||
}
|
||||
|
@ -186,7 +186,7 @@ int orte_ess_base_orted_setup(void)
|
||||
/* setup my session directory */
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_debug_output,
|
||||
"%s setting up session dir with\n\ttmpdir: %s\n\thost %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
(NULL == orte_proc_info.tmpdir_base) ? "UNDEF" : orte_proc_info.tmpdir_base,
|
||||
orte_proc_info.nodename));
|
||||
|
||||
|
@ -175,15 +175,15 @@ static bool proc_is_local(orte_process_name_t *proc)
|
||||
if (pmap[proc->vpid].node == (int32_t)ORTE_PROC_MY_DAEMON->vpid) {
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
|
||||
"%s ess:bproc: proc %s is LOCAL",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(proc)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(proc)));
|
||||
return true;
|
||||
}
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
|
||||
"%s ess:bproc: proc %s is REMOTE",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(proc)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(proc)));
|
||||
|
||||
return false;
|
||||
|
||||
@ -231,8 +231,8 @@ static char* proc_get_hostname(orte_process_name_t *proc)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
|
||||
"%s ess:bproc: proc %s is on host %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(proc),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(proc),
|
||||
nids[node]->name));
|
||||
|
||||
return nids[node]->name;
|
||||
@ -256,8 +256,8 @@ static uint32_t proc_get_arch(orte_process_name_t *proc)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
|
||||
"%s ess:bproc: proc %s has arch %0x",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(proc),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(proc),
|
||||
nids[node]->arch));
|
||||
|
||||
return nids[node]->arch;
|
||||
@ -274,8 +274,8 @@ static int update_arch(orte_process_name_t *proc, uint32_t arch)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
|
||||
"%s ess:bproc: updating proc %s to arch %0x",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(proc),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(proc),
|
||||
arch));
|
||||
|
||||
nids[node]->arch = arch;
|
||||
@ -287,8 +287,8 @@ static uint8_t proc_get_local_rank(orte_process_name_t *proc)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
|
||||
"%s ess:bproc: proc %s has local rank %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(proc),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(proc),
|
||||
(int)pmap[proc->vpid].local_rank));
|
||||
|
||||
return pmap[proc->vpid].local_rank;
|
||||
@ -299,8 +299,8 @@ static uint8_t proc_get_node_rank(orte_process_name_t *proc)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
|
||||
"%s ess:bproc: proc %s has node rank %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(proc),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(proc),
|
||||
(int)pmap[proc->vpid].node_rank));
|
||||
|
||||
return pmap[proc->vpid].node_rank;
|
||||
|
38
orte/mca/ess/env/ess_env_module.c
поставляемый
38
orte/mca/ess/env/ess_env_module.c
поставляемый
@ -224,15 +224,15 @@ static uint8_t proc_get_locality(orte_process_name_t *proc)
|
||||
if (nid->daemon == ORTE_PROC_MY_DAEMON->vpid) {
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
|
||||
"%s ess:env: proc %s on LOCAL NODE",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(proc)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(proc)));
|
||||
return (OPAL_PROC_ON_NODE | OPAL_PROC_ON_CU | OPAL_PROC_ON_CLUSTER);
|
||||
}
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
|
||||
"%s ess:env: proc %s is REMOTE",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(proc)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(proc)));
|
||||
|
||||
return OPAL_PROC_NON_LOCAL;
|
||||
|
||||
@ -248,9 +248,9 @@ static orte_vpid_t proc_get_daemon(orte_process_name_t *proc)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
|
||||
"%s ess:env: proc %s is hosted by daemon %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(proc),
|
||||
ORTE_VPID_PRINT(nid->daemon)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(proc),
|
||||
orte_util_print_vpids(nid->daemon)));
|
||||
|
||||
return nid->daemon;
|
||||
}
|
||||
@ -266,8 +266,8 @@ static char* proc_get_hostname(orte_process_name_t *proc)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
|
||||
"%s ess:env: proc %s is on host %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(proc),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(proc),
|
||||
nid->name));
|
||||
|
||||
return nid->name;
|
||||
@ -284,8 +284,8 @@ static uint32_t proc_get_arch(orte_process_name_t *proc)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
|
||||
"%s ess:env: proc %s has arch %0x",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(proc),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(proc),
|
||||
nid->arch));
|
||||
|
||||
return nid->arch;
|
||||
@ -302,8 +302,8 @@ static int update_arch(orte_process_name_t *proc, uint32_t arch)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
|
||||
"%s ess:env: updating proc %s to arch %0x",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(proc),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(proc),
|
||||
arch));
|
||||
|
||||
nid->arch = arch;
|
||||
@ -322,8 +322,8 @@ static orte_local_rank_t proc_get_local_rank(orte_process_name_t *proc)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
|
||||
"%s ess:env: proc %s has local rank %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(proc),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(proc),
|
||||
(int)pmap->local_rank));
|
||||
|
||||
return pmap->local_rank;
|
||||
@ -339,8 +339,8 @@ static orte_node_rank_t proc_get_node_rank(orte_process_name_t *proc)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
|
||||
"%s ess:env: proc %s has node rank %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(proc),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(proc),
|
||||
(int)pmap->node_rank));
|
||||
|
||||
return pmap->node_rank;
|
||||
@ -352,7 +352,7 @@ static int update_pidmap(opal_byte_object_t *bo)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
|
||||
"%s ess:env: updating pidmap",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
|
||||
/* build the pmap */
|
||||
if (ORTE_SUCCESS != (ret = orte_util_decode_pidmap(bo))) {
|
||||
@ -407,7 +407,7 @@ static int env_set_name(void)
|
||||
ORTE_PROC_MY_NAME->vpid = vpid;
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_ess_base_output,
|
||||
"ess:env set name to %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
"ess:env set name to %s", orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
|
||||
/* get the non-name common environmental variables */
|
||||
if (ORTE_SUCCESS != (rc = orte_ess_env_get())) {
|
||||
|
@ -286,7 +286,7 @@ static int rte_init(char flags)
|
||||
/* setup my session directory */
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_debug_output,
|
||||
"%s setting up session dir with\n\ttmpdir: %s\n\thost %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
(NULL == orte_proc_info.tmpdir_base) ? "UNDEF" : orte_proc_info.tmpdir_base,
|
||||
orte_proc_info.nodename));
|
||||
|
||||
@ -312,18 +312,18 @@ static int rte_init(char flags)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_debug_output,
|
||||
"%s writing contact file %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
contact_path));
|
||||
|
||||
if (ORTE_SUCCESS != (ret = orte_write_hnp_contact_file(contact_path))) {
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_debug_output,
|
||||
"%s writing contact file failed with error %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
ORTE_ERROR_NAME(ret)));
|
||||
} else {
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_debug_output,
|
||||
"%s wrote contact file",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
}
|
||||
free(contact_path);
|
||||
|
||||
@ -623,16 +623,16 @@ static uint8_t proc_get_locality(orte_process_name_t *proc)
|
||||
procs[i]->name.vpid == proc->vpid) {
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
|
||||
"%s ess:hnp: proc %s is LOCAL",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(proc)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(proc)));
|
||||
return (OPAL_PROC_ON_NODE | OPAL_PROC_ON_CU | OPAL_PROC_ON_CLUSTER);
|
||||
}
|
||||
}
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
|
||||
"%s ess:hnp: proc %s is REMOTE",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(proc)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(proc)));
|
||||
|
||||
return OPAL_PROC_NON_LOCAL;
|
||||
|
||||
@ -667,9 +667,9 @@ static orte_vpid_t proc_get_daemon(orte_process_name_t *proc)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
|
||||
"%s ess:hnp: proc %s is hosted by daemon %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(proc),
|
||||
ORTE_VPID_PRINT(pdata->node->daemon->name.vpid)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(proc),
|
||||
orte_util_print_vpids(pdata->node->daemon->name.vpid)));
|
||||
|
||||
return pdata->node->daemon->name.vpid;
|
||||
}
|
||||
@ -685,8 +685,8 @@ static char* proc_get_hostname(orte_process_name_t *proc)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
|
||||
"%s ess:hnp: proc %s is on host %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(proc),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(proc),
|
||||
pdata->node->name));
|
||||
|
||||
return pdata->node->name;
|
||||
@ -703,8 +703,8 @@ static uint32_t proc_get_arch(orte_process_name_t *proc)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
|
||||
"%s ess:hnp: proc %s has arch %0x",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(proc),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(proc),
|
||||
pdata->node->arch));
|
||||
|
||||
return pdata->node->arch;
|
||||
@ -721,8 +721,8 @@ static int update_arch(orte_process_name_t *proc, uint32_t arch)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
|
||||
"%s ess:hnp: updating proc %s to arch %0x",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(proc),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(proc),
|
||||
arch));
|
||||
|
||||
pdata->node->arch = arch;
|
||||
@ -741,8 +741,8 @@ static orte_local_rank_t proc_get_local_rank(orte_process_name_t *proc)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
|
||||
"%s ess:hnp: proc %s has local rank %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(proc),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(proc),
|
||||
(int)pdata->local_rank));
|
||||
|
||||
return pdata->local_rank;
|
||||
@ -759,8 +759,8 @@ static orte_node_rank_t proc_get_node_rank(orte_process_name_t *proc)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
|
||||
"%s ess:hnp: proc %s has node rank %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(proc),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(proc),
|
||||
(int)pdata->node_rank));
|
||||
|
||||
return pdata->node_rank;
|
||||
|
@ -184,15 +184,15 @@ static uint8_t proc_get_locality(orte_process_name_t *proc)
|
||||
if (nid->daemon == ORTE_PROC_MY_DAEMON->vpid) {
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
|
||||
"%s ess:lsf: proc %s is LOCAL",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(proc)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(proc)));
|
||||
return (OPAL_PROC_ON_NODE | OPAL_PROC_ON_CU | OPAL_PROC_ON_CLUSTER);
|
||||
}
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
|
||||
"%s ess:lsf: proc %s is REMOTE",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(proc)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(proc)));
|
||||
|
||||
return OPAL_PROC_NON_LOCAL;
|
||||
|
||||
@ -208,9 +208,9 @@ static orte_vpid_t proc_get_daemon(orte_process_name_t *proc)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
|
||||
"%s ess:lsf: proc %s is hosted by daemon %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(proc),
|
||||
ORTE_VPID_PRINT(nid->daemon)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(proc),
|
||||
orte_util_print_vpids(nid->daemon)));
|
||||
|
||||
return nid->daemon;
|
||||
}
|
||||
@ -226,8 +226,8 @@ static char* proc_get_hostname(orte_process_name_t *proc)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
|
||||
"%s ess:lsf: proc %s is on host %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(proc),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(proc),
|
||||
nid->name));
|
||||
|
||||
return nid->name;
|
||||
@ -244,8 +244,8 @@ static uint32_t proc_get_arch(orte_process_name_t *proc)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
|
||||
"%s ess:lsf: proc %s has arch %0x",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(proc),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(proc),
|
||||
nid->arch));
|
||||
|
||||
return nid->arch;
|
||||
@ -262,8 +262,8 @@ static int update_arch(orte_process_name_t *proc, uint32_t arch)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
|
||||
"%s ess:lsf: updating proc %s to arch %0x",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(proc),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(proc),
|
||||
arch));
|
||||
|
||||
nid->arch = arch;
|
||||
@ -282,8 +282,8 @@ static orte_local_rank_t proc_get_local_rank(orte_process_name_t *proc)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
|
||||
"%s ess:lsf: proc %s has local rank %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(proc),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(proc),
|
||||
(int)pmap->local_rank));
|
||||
|
||||
return pmap->local_rank;
|
||||
@ -300,8 +300,8 @@ static orte_node_rank_t proc_get_node_rank(orte_process_name_t *proc)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
|
||||
"%s ess:lsf: proc %s has node rank %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(proc),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(proc),
|
||||
(int)pmap->node_rank));
|
||||
|
||||
return pmap->node_rank;
|
||||
|
@ -398,15 +398,15 @@ static uint8_t proc_get_locality(orte_process_name_t *proc)
|
||||
if (nid->daemon == ORTE_PROC_MY_DAEMON->vpid) {
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
|
||||
"%s ess:singleton: proc %s is LOCAL",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(proc)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(proc)));
|
||||
return (OPAL_PROC_ON_NODE | OPAL_PROC_ON_CU | OPAL_PROC_ON_CLUSTER);
|
||||
}
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
|
||||
"%s ess:singleton: proc %s is REMOTE",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(proc)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(proc)));
|
||||
|
||||
return OPAL_PROC_NON_LOCAL;
|
||||
|
||||
@ -422,9 +422,9 @@ static orte_vpid_t proc_get_daemon(orte_process_name_t *proc)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
|
||||
"%s ess:singleton: proc %s is hosted by daemon %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(proc),
|
||||
ORTE_VPID_PRINT(nid->daemon)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(proc),
|
||||
orte_util_print_vpids(nid->daemon)));
|
||||
|
||||
return nid->daemon;
|
||||
}
|
||||
@ -440,8 +440,8 @@ static char* proc_get_hostname(orte_process_name_t *proc)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
|
||||
"%s ess:singleton: proc %s is on host %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(proc),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(proc),
|
||||
nid->name));
|
||||
|
||||
return nid->name;
|
||||
@ -458,8 +458,8 @@ static uint32_t proc_get_arch(orte_process_name_t *proc)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
|
||||
"%s ess:singleton: proc %s has arch %0x",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(proc),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(proc),
|
||||
nid->arch));
|
||||
|
||||
return nid->arch;
|
||||
@ -476,8 +476,8 @@ static int update_arch(orte_process_name_t *proc, uint32_t arch)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
|
||||
"%s ess:singleton: updating proc %s to arch %0x",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(proc),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(proc),
|
||||
arch));
|
||||
|
||||
nid->arch = arch;
|
||||
@ -496,8 +496,8 @@ static orte_local_rank_t proc_get_local_rank(orte_process_name_t *proc)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
|
||||
"%s ess:singleton: proc %s has local rank %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(proc),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(proc),
|
||||
(int)pmap->local_rank));
|
||||
|
||||
return pmap->local_rank;
|
||||
@ -514,8 +514,8 @@ static orte_node_rank_t proc_get_node_rank(orte_process_name_t *proc)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
|
||||
"%s ess:singleton: proc %s has node rank %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(proc),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(proc),
|
||||
(int)pmap->node_rank));
|
||||
|
||||
return pmap->node_rank;
|
||||
|
@ -185,8 +185,8 @@ static uint8_t proc_get_locality(orte_process_name_t *proc)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
|
||||
"%s ess:slave: proc %s is REMOTE",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(proc)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(proc)));
|
||||
|
||||
return OPAL_PROC_NON_LOCAL;
|
||||
|
||||
@ -303,7 +303,7 @@ static int slave_set_name(void)
|
||||
ORTE_PROC_MY_NAME->vpid = vpid;
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_ess_base_output,
|
||||
"ess:slave set name to %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
"ess:slave set name to %s", orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
|
||||
/* get the non-name common environmental variables */
|
||||
if (ORTE_SUCCESS != (rc = orte_ess_env_get())) {
|
||||
|
@ -221,15 +221,15 @@ static uint8_t proc_get_locality(orte_process_name_t *proc)
|
||||
if (nid->daemon == ORTE_PROC_MY_DAEMON->vpid) {
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
|
||||
"%s ess:slurm: proc %s is LOCAL",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(proc)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(proc)));
|
||||
return (OPAL_PROC_ON_NODE | OPAL_PROC_ON_CU | OPAL_PROC_ON_CLUSTER);
|
||||
}
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
|
||||
"%s ess:slurm: proc %s is REMOTE",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(proc)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(proc)));
|
||||
|
||||
return OPAL_PROC_NON_LOCAL;
|
||||
|
||||
@ -249,9 +249,9 @@ static orte_vpid_t proc_get_daemon(orte_process_name_t *proc)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
|
||||
"%s ess:slurm: proc %s is hosted by daemon %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(proc),
|
||||
ORTE_VPID_PRINT(nid->daemon)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(proc),
|
||||
orte_util_print_vpids(nid->daemon)));
|
||||
|
||||
return nid->daemon;
|
||||
}
|
||||
@ -267,8 +267,8 @@ static char* proc_get_hostname(orte_process_name_t *proc)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
|
||||
"%s ess:slurm: proc %s is on host %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(proc),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(proc),
|
||||
nid->name));
|
||||
|
||||
return nid->name;
|
||||
@ -285,8 +285,8 @@ static uint32_t proc_get_arch(orte_process_name_t *proc)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
|
||||
"%s ess:slurm: proc %s has arch %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(proc),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(proc),
|
||||
nid->arch));
|
||||
|
||||
return nid->arch;
|
||||
@ -303,8 +303,8 @@ static int update_arch(orte_process_name_t *proc, uint32_t arch)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
|
||||
"%s ess:slurm: updating proc %s to arch %0x",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(proc),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(proc),
|
||||
arch));
|
||||
|
||||
nid->arch = arch;
|
||||
@ -323,8 +323,8 @@ static orte_local_rank_t proc_get_local_rank(orte_process_name_t *proc)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
|
||||
"%s ess:slurm: proc %s has local rank %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(proc),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(proc),
|
||||
(int)pmap->local_rank));
|
||||
|
||||
return pmap->local_rank;
|
||||
@ -340,8 +340,8 @@ static orte_node_rank_t proc_get_node_rank(orte_process_name_t *proc)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
|
||||
"%s ess:slurm: proc %s has node rank %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(proc),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(proc),
|
||||
(int)pmap->node_rank));
|
||||
|
||||
return pmap->node_rank;
|
||||
@ -353,7 +353,7 @@ static int update_pidmap(opal_byte_object_t *bo)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
|
||||
"%s ess:slurm: updating pidmap",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
|
||||
/* build the pmap */
|
||||
if (ORTE_SUCCESS != (ret = orte_util_decode_pidmap(bo))) {
|
||||
@ -417,7 +417,7 @@ static int slurm_set_name(void)
|
||||
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_ess_base_output,
|
||||
"ess:slurm set name to %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
"ess:slurm set name to %s", orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
|
||||
/* fix up the system info nodename to match exactly what slurm returned */
|
||||
if (NULL != orte_proc_info.nodename) {
|
||||
@ -533,7 +533,7 @@ static int build_daemon_nidmap(void)
|
||||
*/
|
||||
opal_pointer_array_set_item(&orte_nidmap, node->daemon, node);
|
||||
|
||||
opal_output(0, "%s lookup address for node %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), node->name);
|
||||
opal_output(0, "%s lookup address for node %s", orte_util_print_name_args(ORTE_PROC_MY_NAME), node->name);
|
||||
/* lookup the address of this node */
|
||||
if (NULL == (h = gethostbyname(node->name))) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
|
||||
|
@ -160,7 +160,7 @@ static int rte_init(char flags)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_ess_base_output,
|
||||
"%s local rank %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
local_rank));
|
||||
|
||||
/* get the number of procs in this job */
|
||||
@ -293,7 +293,7 @@ static int rte_init(char flags)
|
||||
}
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_ess_base_output,
|
||||
"%s node %d name %s rank %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
(int) node->index, node->name, (int)vpid));
|
||||
vpid++;
|
||||
}
|
||||
@ -316,7 +316,7 @@ static int rte_init(char flags)
|
||||
}
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_ess_base_output,
|
||||
"%s node %d name %s rank %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
(int) node->index, node->name, (int)vpid));
|
||||
vpid++;
|
||||
--ppn[i];
|
||||
@ -382,15 +382,15 @@ static uint8_t proc_get_locality(orte_process_name_t *proc)
|
||||
if (nid->daemon == ORTE_PROC_MY_DAEMON->vpid) {
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
|
||||
"%s ess:slurmd: proc %s is LOCAL",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(proc)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(proc)));
|
||||
return (OPAL_PROC_ON_NODE | OPAL_PROC_ON_CU | OPAL_PROC_ON_CLUSTER);
|
||||
}
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
|
||||
"%s ess:slurmd: proc %s is REMOTE",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(proc)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(proc)));
|
||||
|
||||
return OPAL_PROC_NON_LOCAL;
|
||||
|
||||
@ -410,9 +410,9 @@ static orte_vpid_t proc_get_daemon(orte_process_name_t *proc)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
|
||||
"%s ess:slurmd: proc %s is hosted by daemon %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(proc),
|
||||
ORTE_VPID_PRINT(nid->daemon)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(proc),
|
||||
orte_util_print_vpids(nid->daemon)));
|
||||
|
||||
return nid->daemon;
|
||||
}
|
||||
@ -428,8 +428,8 @@ static char* proc_get_hostname(orte_process_name_t *proc)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
|
||||
"%s ess:slurmd: proc %s is on host %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(proc),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(proc),
|
||||
nid->name));
|
||||
|
||||
return nid->name;
|
||||
@ -446,8 +446,8 @@ static uint32_t proc_get_arch(orte_process_name_t *proc)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
|
||||
"%s ess:slurmd: proc %s has arch %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(proc),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(proc),
|
||||
nid->arch));
|
||||
|
||||
return nid->arch;
|
||||
@ -464,8 +464,8 @@ static int update_arch(orte_process_name_t *proc, uint32_t arch)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
|
||||
"%s ess:slurmd: updating proc %s to arch %0x",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(proc),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(proc),
|
||||
arch));
|
||||
|
||||
nid->arch = arch;
|
||||
@ -484,8 +484,8 @@ static orte_local_rank_t proc_get_local_rank(orte_process_name_t *proc)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
|
||||
"%s ess:slurmd: proc %s has local rank %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(proc),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(proc),
|
||||
(int)pmap->local_rank));
|
||||
|
||||
return pmap->local_rank;
|
||||
@ -501,8 +501,8 @@ static orte_node_rank_t proc_get_node_rank(orte_process_name_t *proc)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
|
||||
"%s ess:slurmd: proc %s has node rank %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(proc),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(proc),
|
||||
(int)pmap->node_rank));
|
||||
|
||||
return pmap->node_rank;
|
||||
@ -514,7 +514,7 @@ static int update_pidmap(opal_byte_object_t *bo)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
|
||||
"%s ess:slurmd: updating pidmap",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
|
||||
/* build the pmap */
|
||||
if (ORTE_SUCCESS != (ret = orte_util_decode_pidmap(bo))) {
|
||||
|
@ -77,7 +77,7 @@ int orte_filem_base_comm_start(void)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_filem_base_output,
|
||||
"%s filem:base: Receive: Start command recv",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
|
||||
if (ORTE_SUCCESS != (rc = orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD,
|
||||
ORTE_RML_TAG_FILEM_BASE,
|
||||
@ -107,7 +107,7 @@ int orte_filem_base_comm_stop(void)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_filem_base_output,
|
||||
"%s filem:base:receive stop comm",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
|
||||
if (ORTE_SUCCESS != (rc = orte_rml.recv_cancel(ORTE_NAME_WILDCARD, ORTE_RML_TAG_FILEM_BASE))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
@ -133,7 +133,7 @@ void orte_filem_base_recv(int status, orte_process_name_t* sender,
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_filem_base_output,
|
||||
"%s filem:base: Receive a command message.",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
|
||||
count = 1;
|
||||
if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &command, &count, ORTE_FILEM_CMD))) {
|
||||
@ -145,7 +145,7 @@ void orte_filem_base_recv(int status, orte_process_name_t* sender,
|
||||
case ORTE_FILEM_GET_PROC_NODE_NAME_CMD:
|
||||
OPAL_OUTPUT_VERBOSE((10, orte_filem_base_output,
|
||||
"%s filem:base: Command: Get Proc node name command",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
|
||||
filem_base_process_get_proc_node_name_cmd(sender, buffer);
|
||||
break;
|
||||
@ -153,7 +153,7 @@ void orte_filem_base_recv(int status, orte_process_name_t* sender,
|
||||
case ORTE_FILEM_GET_REMOTE_PATH_CMD:
|
||||
OPAL_OUTPUT_VERBOSE((10, orte_filem_base_output,
|
||||
"%s filem:base: Command: Get remote path command",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
|
||||
filem_base_process_get_remote_path_cmd(sender, buffer);
|
||||
break;
|
||||
@ -264,8 +264,8 @@ static void filem_base_process_get_remote_path_cmd(orte_process_name_t* sender,
|
||||
|
||||
opal_output_verbose(10, orte_filem_base_output,
|
||||
"filem:base: process_get_remote_path_cmd: %s -> %s: Filename Requested (%s) translated to (%s)",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(sender),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(sender),
|
||||
filename, tmp_name);
|
||||
|
||||
/*
|
||||
|
@ -616,8 +616,8 @@ static int orte_filem_rsh_start_copy(orte_filem_base_request_t *request) {
|
||||
if( 0 != access(f_set->local_target, R_OK) ) {
|
||||
OPAL_OUTPUT_VERBOSE((10, mca_filem_rsh_component.super.output_handle,
|
||||
"filem:rsh: copy(): %s -> %s: Error: Cannot move file %s to %s. Does not exist at source\n",
|
||||
ORTE_NAME_PRINT(&p_set->source),
|
||||
ORTE_NAME_PRINT(&p_set->sink),
|
||||
orte_util_print_name_args(&p_set->source),
|
||||
orte_util_print_name_args(&p_set->sink),
|
||||
f_set->local_target,
|
||||
f_set->remote_target));
|
||||
orte_show_help("help-orte-filem-rsh.txt",
|
||||
@ -639,8 +639,8 @@ static int orte_filem_rsh_start_copy(orte_filem_base_request_t *request) {
|
||||
if( 0 == access(f_set->local_target, R_OK) ) {
|
||||
OPAL_OUTPUT_VERBOSE((10, mca_filem_rsh_component.super.output_handle,
|
||||
"filem:rsh: copy(): %s -> %s: Error: Cannot move file %s to %s. Already exists at destination\n",
|
||||
ORTE_NAME_PRINT(&p_set->source),
|
||||
ORTE_NAME_PRINT(&p_set->sink),
|
||||
orte_util_print_name_args(&p_set->source),
|
||||
orte_util_print_name_args(&p_set->sink),
|
||||
f_set->remote_target,
|
||||
f_set->local_target));
|
||||
orte_show_help("help-orte-filem-rsh.txt",
|
||||
@ -656,15 +656,15 @@ static int orte_filem_rsh_start_copy(orte_filem_base_request_t *request) {
|
||||
if( request->movement_type == ORTE_FILEM_MOVE_TYPE_PUT ) {
|
||||
OPAL_OUTPUT_VERBOSE((10, mca_filem_rsh_component.super.output_handle,
|
||||
"filem:rsh: copy(): %s -> %s: Moving file %s to %s\n",
|
||||
ORTE_NAME_PRINT(&p_set->source),
|
||||
ORTE_NAME_PRINT(&p_set->sink),
|
||||
orte_util_print_name_args(&p_set->source),
|
||||
orte_util_print_name_args(&p_set->sink),
|
||||
f_set->local_target,
|
||||
f_set->remote_target));
|
||||
} else {
|
||||
OPAL_OUTPUT_VERBOSE((10, mca_filem_rsh_component.super.output_handle,
|
||||
"filem:rsh: copy(): %s -> %s: Moving file %s to %s\n",
|
||||
ORTE_NAME_PRINT(&p_set->source),
|
||||
ORTE_NAME_PRINT(&p_set->sink),
|
||||
orte_util_print_name_args(&p_set->source),
|
||||
orte_util_print_name_args(&p_set->sink),
|
||||
f_set->remote_target,
|
||||
f_set->local_target));
|
||||
}
|
||||
@ -674,8 +674,8 @@ static int orte_filem_rsh_start_copy(orte_filem_base_request_t *request) {
|
||||
*/
|
||||
OPAL_OUTPUT_VERBOSE((10, mca_filem_rsh_component.super.output_handle,
|
||||
"filem:rsh: copy(): %s -> %s: Get node name.\n",
|
||||
ORTE_NAME_PRINT(&p_set->source),
|
||||
ORTE_NAME_PRINT(&p_set->sink)));
|
||||
orte_util_print_name_args(&p_set->source),
|
||||
orte_util_print_name_args(&p_set->sink)));
|
||||
if( ORTE_SUCCESS != (ret = orte_filem_base_get_proc_node_name(&p_set->source, &remote_machine))) {
|
||||
opal_output(mca_filem_rsh_component.super.output_handle,
|
||||
"filem:rsh: copy(): Get Node Name failed (%d)", ret);
|
||||
@ -684,8 +684,8 @@ static int orte_filem_rsh_start_copy(orte_filem_base_request_t *request) {
|
||||
}
|
||||
OPAL_OUTPUT_VERBOSE((10, mca_filem_rsh_component.super.output_handle,
|
||||
"filem:rsh: copy(): %s -> %s: Got node name: %s\n",
|
||||
ORTE_NAME_PRINT(&p_set->source),
|
||||
ORTE_NAME_PRINT(&p_set->sink),
|
||||
orte_util_print_name_args(&p_set->source),
|
||||
orte_util_print_name_args(&p_set->sink),
|
||||
remote_machine));
|
||||
|
||||
/*
|
||||
@ -695,8 +695,8 @@ static int orte_filem_rsh_start_copy(orte_filem_base_request_t *request) {
|
||||
*/
|
||||
OPAL_OUTPUT_VERBOSE((10, mca_filem_rsh_component.super.output_handle,
|
||||
"filem:rsh: copy(): %s -> %s: Query remote path (%s).\n",
|
||||
ORTE_NAME_PRINT(&p_set->source),
|
||||
ORTE_NAME_PRINT(&p_set->sink),
|
||||
orte_util_print_name_args(&p_set->source),
|
||||
orte_util_print_name_args(&p_set->sink),
|
||||
f_set->remote_target));
|
||||
remote_file = strdup(f_set->remote_target);
|
||||
if( ORTE_SUCCESS != (ret = orte_filem_rsh_query_remote_path(&remote_file, &p_set->source, &f_set->target_flag) ) ) {
|
||||
@ -707,8 +707,8 @@ static int orte_filem_rsh_start_copy(orte_filem_base_request_t *request) {
|
||||
}
|
||||
OPAL_OUTPUT_VERBOSE((10, mca_filem_rsh_component.super.output_handle,
|
||||
"filem:rsh: copy(): %s -> %s: Remote path (%s) is (%s).\n",
|
||||
ORTE_NAME_PRINT(&p_set->source),
|
||||
ORTE_NAME_PRINT(&p_set->sink),
|
||||
orte_util_print_name_args(&p_set->source),
|
||||
orte_util_print_name_args(&p_set->sink),
|
||||
f_set->remote_target,
|
||||
remote_file));
|
||||
|
||||
@ -983,7 +983,7 @@ static int orte_filem_rsh_start_command(orte_filem_base_process_set_t *proc_set
|
||||
*/
|
||||
OPAL_OUTPUT_VERBOSE((10, mca_filem_rsh_component.super.output_handle,
|
||||
"filem:rsh: start_command(): Ask permission to send from proc %s",
|
||||
ORTE_NAME_PRINT(&(proc_set->source))));
|
||||
orte_util_print_name_args(&(proc_set->source))));
|
||||
if( ORTE_SUCCESS != (ret = orte_filem_rsh_permission_ask(&(proc_set->source), 1)) ) {
|
||||
return ret;
|
||||
}
|
||||
@ -1164,7 +1164,7 @@ static void orte_filem_rsh_permission_callback(int status,
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((10, mca_filem_rsh_component.super.output_handle,
|
||||
"filem:rsh: permission_callback(? ?): Peer %s ...",
|
||||
ORTE_NAME_PRINT(sender)));
|
||||
orte_util_print_name_args(sender)));
|
||||
|
||||
/*
|
||||
* Receive the flag indicating if this is:
|
||||
@ -1180,7 +1180,7 @@ static void orte_filem_rsh_permission_callback(int status,
|
||||
if( ORTE_FILEM_RSH_ASK == perm_flag ) {
|
||||
OPAL_OUTPUT_VERBOSE((10, mca_filem_rsh_component.super.output_handle,
|
||||
"filem:rsh: permission_callback(ASK): Peer %s Asking permission to send [Used %d of %d]",
|
||||
ORTE_NAME_PRINT(sender),
|
||||
orte_util_print_name_args(sender),
|
||||
cur_num_incomming,
|
||||
orte_filem_rsh_max_incomming));
|
||||
|
||||
@ -1202,7 +1202,7 @@ static void orte_filem_rsh_permission_callback(int status,
|
||||
/* Add to the waiting list */
|
||||
OPAL_OUTPUT_VERBOSE((10, mca_filem_rsh_component.super.output_handle,
|
||||
"filem:rsh: permission_callback(ASK): Add Peer %s request to waiting list",
|
||||
ORTE_NAME_PRINT(sender)));
|
||||
orte_util_print_name_args(sender)));
|
||||
|
||||
wp_item = OBJ_NEW(orte_filem_rsh_work_pool_item_t);
|
||||
wp_item->proc_set.source.jobid = sender->jobid;
|
||||
@ -1217,7 +1217,7 @@ static void orte_filem_rsh_permission_callback(int status,
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((10, mca_filem_rsh_component.super.output_handle,
|
||||
"filem:rsh: permission_callback(ASK): Respond to Peer %s with %d",
|
||||
ORTE_NAME_PRINT(sender), num_allowed));
|
||||
orte_util_print_name_args(sender), num_allowed));
|
||||
|
||||
permission_send_num_allowed(sender, num_allowed);
|
||||
}
|
||||
@ -1226,7 +1226,7 @@ static void orte_filem_rsh_permission_callback(int status,
|
||||
else if( ORTE_FILEM_RSH_ALLOW == perm_flag ) {
|
||||
OPAL_OUTPUT_VERBOSE((10, mca_filem_rsh_component.super.output_handle,
|
||||
"filem:rsh: permission_callback(ALLOW): Peer %s Allowing me to send",
|
||||
ORTE_NAME_PRINT(sender)));
|
||||
orte_util_print_name_args(sender)));
|
||||
|
||||
/*
|
||||
* Receive the allowed transmit amount
|
||||
@ -1245,7 +1245,7 @@ static void orte_filem_rsh_permission_callback(int status,
|
||||
if( 0 >= opal_list_get_size(&work_pool_pending) ) {
|
||||
OPAL_OUTPUT_VERBOSE((10, mca_filem_rsh_component.super.output_handle,
|
||||
"filem:rsh: permission_callback(ALLOW): No more pending sends to peer %s...",
|
||||
ORTE_NAME_PRINT(sender)));
|
||||
orte_util_print_name_args(sender)));
|
||||
break;
|
||||
}
|
||||
|
||||
@ -1267,7 +1267,7 @@ static void orte_filem_rsh_permission_callback(int status,
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((10, mca_filem_rsh_component.super.output_handle,
|
||||
"filem:rsh: permission_callback(ALLOW): Starting to send to peer %s... (# pending = %d)",
|
||||
ORTE_NAME_PRINT(sender), (int)opal_list_get_size(&work_pool_pending)));
|
||||
orte_util_print_name_args(sender), (int)opal_list_get_size(&work_pool_pending)));
|
||||
wp_item->active = true;
|
||||
opal_list_append(&work_pool_active, &(wp_item->super));
|
||||
if( ORTE_SUCCESS != (ret = start_child(wp_item->command,
|
||||
@ -1281,7 +1281,7 @@ static void orte_filem_rsh_permission_callback(int status,
|
||||
else if( ORTE_FILEM_RSH_DONE == perm_flag ) {
|
||||
OPAL_OUTPUT_VERBOSE((10, mca_filem_rsh_component.super.output_handle,
|
||||
"filem:rsh: permission_callback(DONE): Peer %s is done sending to me",
|
||||
ORTE_NAME_PRINT(sender)));
|
||||
orte_util_print_name_args(sender)));
|
||||
|
||||
/*
|
||||
* Receive the number of open slots
|
||||
@ -1326,7 +1326,7 @@ static void orte_filem_rsh_permission_callback(int status,
|
||||
|
||||
orte_show_help("help-orte-filem-rsh.txt",
|
||||
"orte-filem-rsh:remote-get-failed",
|
||||
true, ORTE_NAME_PRINT(sender), peer_status,
|
||||
true, orte_util_print_name_args(sender), peer_status,
|
||||
local_target,
|
||||
remote_target,
|
||||
remote_cmd);
|
||||
|
@ -132,8 +132,8 @@ static int xcast(orte_jobid_t job,
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base_output,
|
||||
"%s grpcomm:bad:xcast sent to job %s tag %ld",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_JOBID_PRINT(job), (long)tag));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_jobids(job), (long)tag));
|
||||
|
||||
/* if there is no message to send, then just return ok */
|
||||
if (NULL == buffer) {
|
||||
@ -238,7 +238,7 @@ static int barrier(void)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base_output,
|
||||
"%s grpcomm:bad entering barrier",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
|
||||
/* everyone sends barrier to local daemon */
|
||||
OBJ_CONSTRUCT(&buf, opal_buffer_t);
|
||||
@ -258,7 +258,7 @@ static int barrier(void)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_grpcomm_base_output,
|
||||
"%s grpcomm:bad barrier sent",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
|
||||
/* now receive the release. Be sure to do this in
|
||||
* a manner that allows us to return without being in a recv!
|
||||
@ -275,7 +275,7 @@ static int barrier(void)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_grpcomm_base_output,
|
||||
"%s grpcomm:bad received barrier release",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
@ -304,7 +304,7 @@ static int allgather(opal_buffer_t *sbuf, opal_buffer_t *rbuf)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base_output,
|
||||
"%s grpcomm:bad entering allgather",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
|
||||
/* everyone sends data to their local daemon */
|
||||
OBJ_CONSTRUCT(&coll, opal_buffer_t);
|
||||
@ -330,7 +330,7 @@ static int allgather(opal_buffer_t *sbuf, opal_buffer_t *rbuf)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_grpcomm_base_output,
|
||||
"%s grpcomm:bad allgather buffer sent",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
|
||||
/* setup the buffer that will recv the results */
|
||||
allgather_buf = OBJ_NEW(opal_buffer_t);
|
||||
@ -358,7 +358,7 @@ static int allgather(opal_buffer_t *sbuf, opal_buffer_t *rbuf)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base_output,
|
||||
"%s grpcomm:bad allgather completed",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
@ -371,7 +371,7 @@ static int modex(opal_list_t *procs)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base_output,
|
||||
"%s grpcomm:bad: modex entered",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
|
||||
if (NULL == procs) {
|
||||
if (ORTE_SUCCESS != (rc = orte_grpcomm_base_peer_modex(true))) {
|
||||
@ -385,7 +385,7 @@ static int modex(opal_list_t *procs)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base_output,
|
||||
"%s grpcomm:bad: modex completed",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
|
||||
return rc;
|
||||
}
|
||||
@ -430,7 +430,7 @@ static int daemon_collective(orte_process_name_t *sender, opal_buffer_t *data)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_output,
|
||||
"%s odls: daemon collective called",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
|
||||
/* unpack the jobid using this collective */
|
||||
n = 1;
|
||||
@ -535,8 +535,8 @@ static int daemon_collective(orte_process_name_t *sender, opal_buffer_t *data)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_output,
|
||||
"%s grpcomm:bad: daemon collective for job %s from %s type %ld num_collected %d num_participating %d num_contributors %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_JOBID_PRINT(jobid),
|
||||
ORTE_NAME_PRINT(sender),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME), orte_util_print_jobids(jobid),
|
||||
orte_util_print_name_args(sender),
|
||||
(long)jobdat->collective_type, jobdat->num_collected,
|
||||
jobdat->num_participating, jobdat->num_contributors));
|
||||
|
||||
@ -575,8 +575,8 @@ static int daemon_collective(orte_process_name_t *sender, opal_buffer_t *data)
|
||||
my_parent.vpid = orte_routed.get_routing_tree(NULL);
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_output,
|
||||
"%s grpcomm:bad: daemon collective not the HNP - sending to parent %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&my_parent)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(&my_parent)));
|
||||
if (0 > (rc = orte_rml.send_buffer(&my_parent, &buf, ORTE_RML_TAG_DAEMON_COLLECTIVE, 0))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
@ -588,8 +588,8 @@ static int daemon_collective(orte_process_name_t *sender, opal_buffer_t *data)
|
||||
hnp_process:
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_output,
|
||||
"%s grpcomm:bad: daemon collective HNP - xcasting to job %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_JOBID_PRINT(jobid)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_jobids(jobid)));
|
||||
/* setup a buffer to send the results back to the job members */
|
||||
OBJ_CONSTRUCT(&buf, opal_buffer_t);
|
||||
|
||||
@ -688,8 +688,8 @@ static void process_msg(int fd, short event, void *data)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_output,
|
||||
"%s grpcomm:bad: collecting data from child %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(child->name)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(child->name)));
|
||||
|
||||
found = true;
|
||||
break;
|
||||
@ -750,7 +750,7 @@ static void process_msg(int fd, short event, void *data)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_output,
|
||||
"%s grpcomm:bad: executing collective",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
|
||||
/* prep a buffer to pass it all along */
|
||||
OBJ_CONSTRUCT(&relay, opal_buffer_t);
|
||||
@ -780,7 +780,7 @@ static void process_msg(int fd, short event, void *data)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base_output,
|
||||
"%s grpcomm:bad: collective completed",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
}
|
||||
|
||||
CLEANUP:
|
||||
@ -796,8 +796,8 @@ static void daemon_coll_recv(int status, orte_process_name_t* sender,
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_output,
|
||||
"%s grpcomm:bad:receive got message from %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(sender)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(sender)));
|
||||
|
||||
/* don't process this right away - we need to get out of the recv before
|
||||
* we process the message as it may ask us to do something that involves
|
||||
|
@ -52,8 +52,8 @@ static void allgather_server_recv(int status, orte_process_name_t* sender,
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_grpcomm_base_output,
|
||||
"%s allgather buffer received from %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(sender)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(sender)));
|
||||
|
||||
/* append this data to the allgather_buf */
|
||||
if (ORTE_SUCCESS != (rc = opal_dss.copy_payload(allgather_buf, buffer))) {
|
||||
@ -82,7 +82,7 @@ static void allgather_client_recv(int status, orte_process_name_t* sender,
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_grpcomm_base_output,
|
||||
"%s grpcomm:base: allgather buffer received",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
|
||||
/* transfer the buffer */
|
||||
if (ORTE_SUCCESS != (rc = opal_dss.copy_payload(allgather_buf, buffer))) {
|
||||
@ -113,7 +113,7 @@ int orte_grpcomm_base_allgather_list(opal_list_t *names, opal_buffer_t *sbuf, op
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base_output,
|
||||
"%s grpcomm: entering allgather_list",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
|
||||
/* the first entry on the list is the "root" that collects
|
||||
* all the data - everyone else just sends and gets back
|
||||
@ -126,8 +126,8 @@ int orte_grpcomm_base_allgather_list(opal_list_t *names, opal_buffer_t *sbuf, op
|
||||
/* everyone but root sends data */
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_grpcomm_base_output,
|
||||
"%s allgather_list: sending my data to %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&root->name)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(&root->name)));
|
||||
|
||||
if (0 > orte_rml.send_buffer(&root->name, sbuf, ORTE_RML_TAG_ALLGATHER_LIST, 0)) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE);
|
||||
@ -136,7 +136,7 @@ int orte_grpcomm_base_allgather_list(opal_list_t *names, opal_buffer_t *sbuf, op
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_grpcomm_base_output,
|
||||
"%s allgather_list: buffer sent",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
|
||||
/* setup the buffer that will recv the results */
|
||||
allgather_buf = OBJ_NEW(opal_buffer_t);
|
||||
@ -172,7 +172,7 @@ int orte_grpcomm_base_allgather_list(opal_list_t *names, opal_buffer_t *sbuf, op
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_grpcomm_base_output,
|
||||
"%s allgather_list: buffer received",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
@ -203,7 +203,7 @@ int orte_grpcomm_base_allgather_list(opal_list_t *names, opal_buffer_t *sbuf, op
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_grpcomm_base_output,
|
||||
"%s allgather_list: waiting to recv %ld inputs",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
(long)num_peers-1));
|
||||
|
||||
/* post the non-blocking recv */
|
||||
@ -225,7 +225,7 @@ int orte_grpcomm_base_allgather_list(opal_list_t *names, opal_buffer_t *sbuf, op
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_grpcomm_base_output,
|
||||
"%s allgather_list: received all data",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
|
||||
/* copy the received info to the caller's buffer */
|
||||
if (ORTE_SUCCESS != (rc = opal_dss.copy_payload(rbuf, allgather_buf))) {
|
||||
@ -259,7 +259,7 @@ int orte_grpcomm_base_allgather_list(opal_list_t *names, opal_buffer_t *sbuf, op
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base_output,
|
||||
"%s grpcomm: allgather_list completed",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
@ -83,8 +83,8 @@ void orte_grpcomm_base_coll_recv(int status, orte_process_name_t* sender,
|
||||
{
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_output,
|
||||
"%s grpcomm:coll:receive got message from %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(sender)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(sender)));
|
||||
|
||||
/* don't process this right away - we need to get out of the recv before
|
||||
* we process the message as it may ask us to do something that involves
|
||||
@ -111,7 +111,7 @@ int orte_grpcomm_base_allgather(opal_buffer_t *sendbuf, opal_buffer_t *recvbuf,
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_output,
|
||||
"%s grpcomm:coll:allgather called with %d entries np %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
num_entries, (int)np));
|
||||
|
||||
/* if we only have one proc participating, just copy the data across and return */
|
||||
@ -162,7 +162,7 @@ static int twoproc(opal_buffer_t *sendbuf, opal_buffer_t *recvbuf, int32_t num_e
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_output,
|
||||
"%s grpcomm:coll:two-proc algo employed",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
|
||||
if (vpids[0] == ORTE_PROC_MY_NAME->vpid) {
|
||||
/* I send first */
|
||||
@ -175,8 +175,8 @@ static int twoproc(opal_buffer_t *sendbuf, opal_buffer_t *recvbuf, int32_t num_e
|
||||
opal_dss.copy_payload(&buf, sendbuf);
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_output,
|
||||
"%s grpcomm:coll:two-proc sending to %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&peer)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(&peer)));
|
||||
|
||||
if (0 > (rc = orte_rml.send_buffer(&peer, &buf, ORTE_RML_TAG_DAEMON_COLLECTIVE, 0))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
@ -198,7 +198,7 @@ static int twoproc(opal_buffer_t *sendbuf, opal_buffer_t *recvbuf, int32_t num_e
|
||||
ORTE_PROGRESSED_WAIT(false, num_recvd, 1);
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_output,
|
||||
"%s grpcomm:coll:two-proc got my return message",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
|
||||
} else {
|
||||
/* if I am not the start, then I recv first */
|
||||
@ -215,7 +215,7 @@ static int twoproc(opal_buffer_t *sendbuf, opal_buffer_t *recvbuf, int32_t num_e
|
||||
ORTE_PROGRESSED_WAIT(false, num_recvd, 1);
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_output,
|
||||
"%s grpcomm:coll:two-proc got my starting message",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
|
||||
/* send my data back */
|
||||
OBJ_CONSTRUCT(&buf, opal_buffer_t);
|
||||
@ -224,8 +224,8 @@ static int twoproc(opal_buffer_t *sendbuf, opal_buffer_t *recvbuf, int32_t num_e
|
||||
peer.vpid = vpids[0];
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_output,
|
||||
"%s grpcomm:coll:two-proc sending to %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&peer)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(&peer)));
|
||||
if (0 > (rc = orte_rml.send_buffer(&peer, &buf, ORTE_RML_TAG_DAEMON_COLLECTIVE, 0))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
@ -273,7 +273,7 @@ static int bruck(opal_buffer_t *sendbuf, opal_buffer_t *recvbuf, int32_t num_ent
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_output,
|
||||
"%s grpcomm:coll:bruck algo employed",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
|
||||
/* initialize */
|
||||
total_entries = num_entries;
|
||||
@ -318,8 +318,8 @@ static int bruck(opal_buffer_t *sendbuf, opal_buffer_t *recvbuf, int32_t num_ent
|
||||
opal_dss.copy_payload(&buf, &collection);
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_output,
|
||||
"%s grpcomm:coll:bruck sending to %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&peer)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(&peer)));
|
||||
if (0 > (rc = orte_rml.send_buffer(&peer, &buf, ORTE_RML_TAG_DAEMON_COLLECTIVE, 0))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
@ -388,7 +388,7 @@ static int recursivedoubling(opal_buffer_t *sendbuf, opal_buffer_t *recvbuf, int
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_output,
|
||||
"%s grpcomm:coll:recdub algo employed",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
|
||||
/* initialize */
|
||||
total_entries = num_entries;
|
||||
@ -431,8 +431,8 @@ static int recursivedoubling(opal_buffer_t *sendbuf, opal_buffer_t *recvbuf, int
|
||||
opal_dss.copy_payload(&buf, &collection);
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_output,
|
||||
"%s grpcomm:coll:recdub sending to %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&peer)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(&peer)));
|
||||
if (0 > (rc = orte_rml.send_buffer(&peer, &buf, ORTE_RML_TAG_DAEMON_COLLECTIVE, 0))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
|
@ -66,7 +66,7 @@ int orte_grpcomm_base_full_modex(opal_list_t *procs, bool modex_db)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base_output,
|
||||
"%s grpcomm:base:full:modex: performing modex",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
|
||||
/* setup the buffer that will actually be sent */
|
||||
OBJ_CONSTRUCT(&buf, opal_buffer_t);
|
||||
@ -118,7 +118,7 @@ int orte_grpcomm_base_full_modex(opal_list_t *procs, bool modex_db)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_grpcomm_base_output,
|
||||
"%s grpcomm:base:full:modex: executing allgather",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
|
||||
/* exchange the buffer with the list of peers */
|
||||
if (ORTE_SUCCESS != (rc = orte_grpcomm.allgather_list(procs, &buf, &rbuf))) {
|
||||
@ -128,7 +128,7 @@ int orte_grpcomm_base_full_modex(opal_list_t *procs, bool modex_db)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_grpcomm_base_output,
|
||||
"%s grpcomm:base:full:modex: processing modex info",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
|
||||
/* process the results */
|
||||
nids = (orte_nid_t**)orte_nidmap.addr;
|
||||
@ -142,7 +142,7 @@ int orte_grpcomm_base_full_modex(opal_list_t *procs, bool modex_db)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_output,
|
||||
"%s grpcomm:base:full:modex: received %ld data bytes from %d procs",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
(long)(rbuf.pack_ptr - rbuf.unpack_ptr), num_procs));
|
||||
|
||||
/* if the buffer doesn't have any more data, ignore it */
|
||||
@ -212,7 +212,7 @@ int orte_grpcomm_base_full_modex(opal_list_t *procs, bool modex_db)
|
||||
/* node wasn't found - let's add it */
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_output,
|
||||
"%s grpcomm:base:full:modex no nidmap entry for node %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), hostname));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME), hostname));
|
||||
nid = OBJ_NEW(orte_nid_t);
|
||||
nid->name = strdup(hostname);
|
||||
nid->daemon = daemon;
|
||||
@ -225,8 +225,8 @@ int orte_grpcomm_base_full_modex(opal_list_t *procs, bool modex_db)
|
||||
/* proc wasn't found - let's add it */
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_output,
|
||||
"%s grpcomm:base:full:modex no jobmap entry for job %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_JOBID_PRINT(proc_name.jobid)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_jobids(proc_name.jobid)));
|
||||
jmap = OBJ_NEW(orte_jmap_t);
|
||||
jmap->job = proc_name.jobid;
|
||||
opal_pointer_array_add(&orte_jobmap, jmap);
|
||||
@ -243,8 +243,8 @@ int orte_grpcomm_base_full_modex(opal_list_t *procs, bool modex_db)
|
||||
/* proc wasn't found - let's add it */
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_output,
|
||||
"%s grpcomm:base:full:modex no pidmap entry for proc %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&proc_name)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(&proc_name)));
|
||||
pmap = OBJ_NEW(orte_pmap_t);
|
||||
pmap->node = nid->index;
|
||||
pmap->local_rank = local_rank;
|
||||
@ -255,8 +255,8 @@ int orte_grpcomm_base_full_modex(opal_list_t *procs, bool modex_db)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_output,
|
||||
"%s grpcomm:base:full:modex: adding modex entry for proc %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&proc_name)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(&proc_name)));
|
||||
|
||||
/* UPDATE THE MODEX INFO FOR THIS PROC */
|
||||
|
||||
@ -276,8 +276,8 @@ int orte_grpcomm_base_full_modex(opal_list_t *procs, bool modex_db)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_output,
|
||||
"%s grpcomm:base:full:modex adding %d entries for proc %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), num_recvd_entries,
|
||||
ORTE_NAME_PRINT(&proc_name)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME), num_recvd_entries,
|
||||
orte_util_print_name_args(&proc_name)));
|
||||
|
||||
/*
|
||||
* Extract the attribute names and values
|
||||
@ -338,7 +338,7 @@ int orte_grpcomm_base_peer_modex(bool modex_db)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base_output,
|
||||
"%s grpcomm:base:peer:modex: performing modex",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
|
||||
/* setup the buffer that will actually be sent */
|
||||
OBJ_CONSTRUCT(&buf, opal_buffer_t);
|
||||
@ -363,7 +363,7 @@ int orte_grpcomm_base_peer_modex(bool modex_db)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_grpcomm_base_output,
|
||||
"%s grpcomm:base:peer:modex: executing allgather",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
|
||||
/* exchange the buffer with my peers */
|
||||
if (ORTE_SUCCESS != (rc = orte_grpcomm.allgather(&buf, &rbuf))) {
|
||||
@ -373,7 +373,7 @@ int orte_grpcomm_base_peer_modex(bool modex_db)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_grpcomm_base_output,
|
||||
"%s grpcomm:base:peer:modex: processing modex info",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
|
||||
/* process the results */
|
||||
/* extract the number of procs that put data in the buffer */
|
||||
@ -385,7 +385,7 @@ int orte_grpcomm_base_peer_modex(bool modex_db)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_output,
|
||||
"%s grpcomm:base:peer:modex: received %ld data bytes from %d procs",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
(long)(rbuf.pack_ptr - rbuf.unpack_ptr), num_procs));
|
||||
|
||||
/* if the buffer doesn't have any more data, ignore it */
|
||||
@ -419,8 +419,8 @@ int orte_grpcomm_base_peer_modex(bool modex_db)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_output,
|
||||
"%s grpcomm:base:peer:modex: adding modex entry for proc %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&proc_name)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(&proc_name)));
|
||||
|
||||
if (modex_db) {
|
||||
/* if we are using the modex db, pass the rest of the buffer
|
||||
@ -440,16 +440,16 @@ int orte_grpcomm_base_peer_modex(bool modex_db)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_output,
|
||||
"%s grpcomm:base:peer:modex adding %d entries for proc %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), num_recvd_entries,
|
||||
ORTE_NAME_PRINT(&proc_name)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME), num_recvd_entries,
|
||||
orte_util_print_name_args(&proc_name)));
|
||||
|
||||
/* find this proc's node in the nidmap */
|
||||
if (NULL == (nid = orte_util_lookup_nid(&proc_name))) {
|
||||
/* proc wasn't found - return error */
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_output,
|
||||
"%s grpcomm:base:peer:modex no nidmap entry for proc %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&proc_name)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(&proc_name)));
|
||||
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
|
||||
rc = ORTE_ERR_NOT_FOUND;
|
||||
goto cleanup;
|
||||
@ -746,7 +746,7 @@ int orte_grpcomm_base_set_proc_attr(const char *attr_name,
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_output,
|
||||
"%s grpcomm:set_proc_attr: setting attribute %s data size %lu",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
attr_name, (unsigned long)size));
|
||||
|
||||
/* Pack the attribute name information into the local buffer */
|
||||
@ -787,15 +787,15 @@ int orte_grpcomm_base_get_proc_attr(const orte_process_name_t proc,
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_output,
|
||||
"%s grpcomm:get_proc_attr: searching for attr %s on proc %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), attribute_name,
|
||||
ORTE_NAME_PRINT(&proc)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME), attribute_name,
|
||||
orte_util_print_name_args(&proc)));
|
||||
|
||||
proc_data = modex_lookup_orte_proc(&proc);
|
||||
if (NULL == proc_data) {
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_output,
|
||||
"%s grpcomm:get_proc_attr: no modex entry for proc %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&proc)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(&proc)));
|
||||
return ORTE_ERR_NOT_FOUND;
|
||||
}
|
||||
|
||||
@ -809,8 +809,8 @@ int orte_grpcomm_base_get_proc_attr(const orte_process_name_t proc,
|
||||
(attr_data->attr_data_size == 0)) {
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_output,
|
||||
"%s grpcomm:get_proc_attr: no attr avail or zero byte size for proc %s attribute %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&proc), attribute_name));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(&proc), attribute_name));
|
||||
*val = NULL;
|
||||
*size = 0;
|
||||
} else {
|
||||
@ -825,8 +825,8 @@ int orte_grpcomm_base_get_proc_attr(const orte_process_name_t proc,
|
||||
*size = attr_data->attr_data_size;
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_output,
|
||||
"%s grpcomm:get_proc_attr: found %d bytes for attr %s on proc %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (int)attr_data->attr_data_size,
|
||||
attribute_name, ORTE_NAME_PRINT(&proc)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME), (int)attr_data->attr_data_size,
|
||||
attribute_name, orte_util_print_name_args(&proc)));
|
||||
|
||||
}
|
||||
OPAL_THREAD_UNLOCK(&proc_data->modex_lock);
|
||||
@ -861,7 +861,7 @@ int orte_grpcomm_base_pack_modex_entries(opal_buffer_t *buf, bool *mdx_reqd)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_output,
|
||||
"%s grpcomm:base:pack_modex: reporting %ld entries",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
(long)num_entries));
|
||||
|
||||
/* put the number of entries into the buffer */
|
||||
@ -902,7 +902,7 @@ int orte_grpcomm_base_update_modex_entries(orte_process_name_t *proc_name,
|
||||
if (proc_data == NULL) {
|
||||
/* report the error */
|
||||
opal_output(0, "grpcomm:base:update_modex: received modex info for unknown proc %s\n",
|
||||
ORTE_NAME_PRINT(proc_name));
|
||||
orte_util_print_name_args(proc_name));
|
||||
return ORTE_ERR_NOT_FOUND;
|
||||
}
|
||||
|
||||
@ -917,8 +917,8 @@ int orte_grpcomm_base_update_modex_entries(orte_process_name_t *proc_name,
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_output,
|
||||
"%s grpcomm:base:update_modex_entries: adding %d entries for proc %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), num_recvd_entries,
|
||||
ORTE_NAME_PRINT(proc_name)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME), num_recvd_entries,
|
||||
orte_util_print_name_args(proc_name)));
|
||||
|
||||
/*
|
||||
* Extract the attribute names and values
|
||||
@ -986,15 +986,15 @@ int orte_grpcomm_base_load_modex_data(orte_process_name_t *proc_name, char *attr
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_output,
|
||||
"%s grpcomm:base:load_modex_data: loading %ld bytes for attr %s on proc %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
(long)num_bytes, attr_name, ORTE_NAME_PRINT(proc_name)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
(long)num_bytes, attr_name, orte_util_print_name_args(proc_name)));
|
||||
|
||||
/* look up the modex data structure */
|
||||
proc_data = modex_lookup_orte_proc(proc_name);
|
||||
if (proc_data == NULL) {
|
||||
/* report the error */
|
||||
opal_output(0, "grpcomm:base:update_modex: received modex info for unknown proc %s\n",
|
||||
ORTE_NAME_PRINT(proc_name));
|
||||
orte_util_print_name_args(proc_name));
|
||||
return ORTE_ERR_NOT_FOUND;
|
||||
}
|
||||
|
||||
|
@ -212,8 +212,8 @@ static int xcast(orte_jobid_t job,
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base_output,
|
||||
"%s grpcomm:xcast sent to job %s tag %ld",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_JOBID_PRINT(job), (long)tag));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_jobids(job), (long)tag));
|
||||
|
||||
/* if there is no message to send, then just return ok */
|
||||
if (NULL == buffer) {
|
||||
@ -318,7 +318,7 @@ static int barrier(void)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base_output,
|
||||
"%s grpcomm:basic entering barrier",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
|
||||
/* everyone sends barrier to local daemon */
|
||||
OBJ_CONSTRUCT(&buf, opal_buffer_t);
|
||||
@ -338,7 +338,7 @@ static int barrier(void)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_grpcomm_base_output,
|
||||
"%s grpcomm:basic barrier sent",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
|
||||
/* now receive the release. Be sure to do this in
|
||||
* a manner that allows us to return without being in a recv!
|
||||
@ -355,7 +355,7 @@ static int barrier(void)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_grpcomm_base_output,
|
||||
"%s grpcomm:basic received barrier release",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
@ -383,7 +383,7 @@ static int allgather(opal_buffer_t *sbuf, opal_buffer_t *rbuf)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base_output,
|
||||
"%s grpcomm:basic entering allgather",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
|
||||
/* everyone sends data to their local daemon */
|
||||
OBJ_CONSTRUCT(&coll, opal_buffer_t);
|
||||
@ -409,7 +409,7 @@ static int allgather(opal_buffer_t *sbuf, opal_buffer_t *rbuf)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_grpcomm_base_output,
|
||||
"%s grpcomm:basic allgather buffer sent",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
|
||||
/* setup the buffer that will recv the results */
|
||||
allgather_buf = OBJ_NEW(opal_buffer_t);
|
||||
@ -437,7 +437,7 @@ static int allgather(opal_buffer_t *sbuf, opal_buffer_t *rbuf)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base_output,
|
||||
"%s grpcomm:basic allgather completed",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
@ -455,7 +455,7 @@ static int modex(opal_list_t *procs)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base_output,
|
||||
"%s grpcomm:basic: modex entered",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
|
||||
/* if we were given a list of procs to modex with, then this is happening
|
||||
* as part of a connect/accept operation. In this case, we -must- do the
|
||||
@ -508,7 +508,7 @@ static int modex(opal_list_t *procs)
|
||||
if (orte_hetero_apps || !orte_homogeneous_nodes) {
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base_output,
|
||||
"%s grpcomm:basic: modex is required",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
|
||||
if (ORTE_SUCCESS != (rc = orte_grpcomm_base_peer_modex(false))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
@ -522,7 +522,7 @@ static int modex(opal_list_t *procs)
|
||||
/* the info was provided in the nidmap - there is nothing more we have to do */
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base_output,
|
||||
"%s grpcomm:basic:modex using nidmap",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
@ -533,7 +533,7 @@ static int modex(opal_list_t *procs)
|
||||
*/
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base_output,
|
||||
"%s grpcomm:basic: modex is required",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
if (ORTE_SUCCESS != (rc = orte_grpcomm_base_peer_modex(false))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
}
|
||||
@ -548,7 +548,7 @@ static int modex(opal_list_t *procs)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base_output,
|
||||
"%s grpcomm:basic:modex reading %s file",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), opal_profile_file));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME), opal_profile_file));
|
||||
|
||||
/* loop through file until end */
|
||||
boptr = &bo;
|
||||
@ -612,7 +612,7 @@ static int modex(opal_list_t *procs)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base_output,
|
||||
"%s grpcomm:basic: modex completed",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
|
||||
return rc;
|
||||
}
|
||||
@ -624,7 +624,7 @@ static int set_proc_attr(const char *attr_name, const void *data, size_t size)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base_output,
|
||||
"%s grpcomm:basic:set_proc_attr for attribute %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), attr_name));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME), attr_name));
|
||||
|
||||
/* if we are doing a profile, pack this up */
|
||||
if (opal_profile) {
|
||||
@ -667,8 +667,8 @@ static int get_proc_attr(const orte_process_name_t proc,
|
||||
/* proc wasn't found - return error */
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_output,
|
||||
"%s grpcomm:basic:get_proc_attr: no modex entry for proc %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&proc)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(&proc)));
|
||||
return ORTE_ERR_NOT_FOUND;
|
||||
|
||||
}
|
||||
@ -690,8 +690,8 @@ static int get_proc_attr(const orte_process_name_t proc,
|
||||
*size = attr->size;
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_output,
|
||||
"%s grpcomm:basic:get_proc_attr: found %d bytes for attr %s on proc %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (int)attr->size,
|
||||
attribute_name, ORTE_NAME_PRINT(&proc)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME), (int)attr->size,
|
||||
attribute_name, orte_util_print_name_args(&proc)));
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
}
|
||||
@ -699,8 +699,8 @@ static int get_proc_attr(const orte_process_name_t proc,
|
||||
/* get here if attribute isn't found */
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_output,
|
||||
"%s grpcomm:basic:get_proc_attr: no attr avail or zero byte size for proc %s attribute %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&proc), attribute_name));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(&proc), attribute_name));
|
||||
*val = NULL;
|
||||
*size = 0;
|
||||
|
||||
@ -726,8 +726,8 @@ static void process_msg(int fd, short event, void *data)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_output,
|
||||
"%s grpcomm:basic:receive:profile writing %d bytes of data from proc %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
bo->size, ORTE_NAME_PRINT(&mev->sender)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
bo->size, orte_util_print_name_args(&mev->sender)));
|
||||
|
||||
write(profile_fd, &bo->size, sizeof(bo->size));
|
||||
write(profile_fd, bo->bytes, bo->size);
|
||||
@ -753,8 +753,8 @@ static void profile_recv(int status, orte_process_name_t* sender,
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_output,
|
||||
"%s grpcomm:basic:receive got message from %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(sender)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(sender)));
|
||||
|
||||
/* don't process this right away - we need to get out of the recv before
|
||||
* we process the message as it may ask us to do something that involves
|
||||
@ -818,7 +818,7 @@ static int daemon_collective(orte_process_name_t *sender, opal_buffer_t *data)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_output,
|
||||
"%s odls: daemon collective called",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
|
||||
/* unpack the jobid using this collective */
|
||||
n = 1;
|
||||
@ -923,8 +923,8 @@ static int daemon_collective(orte_process_name_t *sender, opal_buffer_t *data)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_output,
|
||||
"%s grpcomm:bad: daemon collective for job %s from %s type %ld num_collected %d num_participating %d num_contributors %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_JOBID_PRINT(jobid),
|
||||
ORTE_NAME_PRINT(sender),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME), orte_util_print_jobids(jobid),
|
||||
orte_util_print_name_args(sender),
|
||||
(long)jobdat->collective_type, jobdat->num_collected,
|
||||
jobdat->num_participating, jobdat->num_contributors));
|
||||
|
||||
@ -963,8 +963,8 @@ static int daemon_collective(orte_process_name_t *sender, opal_buffer_t *data)
|
||||
my_parent.vpid = orte_routed.get_routing_tree(NULL);
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_output,
|
||||
"%s grpcomm:bad: daemon collective not the HNP - sending to parent %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&my_parent)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(&my_parent)));
|
||||
if (0 > (rc = orte_rml.send_buffer(&my_parent, &buf, ORTE_RML_TAG_DAEMON_COLLECTIVE, 0))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
@ -976,8 +976,8 @@ static int daemon_collective(orte_process_name_t *sender, opal_buffer_t *data)
|
||||
hnp_process:
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_output,
|
||||
"%s grpcomm:bad: daemon collective HNP - xcasting to job %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_JOBID_PRINT(jobid)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_jobids(jobid)));
|
||||
/* setup a buffer to send the results back to the job members */
|
||||
OBJ_CONSTRUCT(&buf, opal_buffer_t);
|
||||
|
||||
@ -1076,8 +1076,8 @@ static void process_coll_msg(int fd, short event, void *data)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_output,
|
||||
"%s grpcomm:bad: collecting data from child %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(child->name)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(child->name)));
|
||||
|
||||
found = true;
|
||||
break;
|
||||
@ -1138,7 +1138,7 @@ static void process_coll_msg(int fd, short event, void *data)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_output,
|
||||
"%s grpcomm:bad: executing collective",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
|
||||
/* prep a buffer to pass it all along */
|
||||
OBJ_CONSTRUCT(&relay, opal_buffer_t);
|
||||
@ -1168,7 +1168,7 @@ static void process_coll_msg(int fd, short event, void *data)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base_output,
|
||||
"%s grpcomm:bad: collective completed",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
}
|
||||
|
||||
CLEANUP:
|
||||
@ -1184,8 +1184,8 @@ static void daemon_coll_recv(int status, orte_process_name_t* sender,
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_output,
|
||||
"%s grpcomm:bad:receive got message from %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(sender)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(sender)));
|
||||
|
||||
/* don't process this right away - we need to get out of the recv before
|
||||
* we process the message as it may ask us to do something that involves
|
||||
|
@ -138,8 +138,8 @@ static int xcast(orte_jobid_t job,
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base_output,
|
||||
"%s grpcomm:xcast sent to job %s tag %ld",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_JOBID_PRINT(job), (long)tag));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_jobids(job), (long)tag));
|
||||
|
||||
/* if there is no message to send, then just return ok */
|
||||
if (NULL == buffer) {
|
||||
@ -234,7 +234,7 @@ static int barrier(void)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base_output,
|
||||
"%s grpcomm:hier entering barrier",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
|
||||
OBJ_CONSTRUCT(&buf1, opal_buffer_t);
|
||||
OBJ_CONSTRUCT(&buf2, opal_buffer_t);
|
||||
@ -247,7 +247,7 @@ static int barrier(void)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base_output,
|
||||
"%s grpcomm:hier barrier complete",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
|
||||
return rc;
|
||||
}
|
||||
@ -302,7 +302,7 @@ static int allgather(opal_buffer_t *sbuf, opal_buffer_t *rbuf)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base_output,
|
||||
"%s grpcomm:hier entering allgather",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
|
||||
/* have I initialized my local info? */
|
||||
if (!coll_initialized) {
|
||||
@ -436,7 +436,7 @@ static int allgather(opal_buffer_t *sbuf, opal_buffer_t *rbuf)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base_output,
|
||||
"%s grpcomm:hier allgather completed",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
@ -456,7 +456,7 @@ static int modex(opal_list_t *procs)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base_output,
|
||||
"%s grpcomm:hier: modex entered",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
|
||||
/* if we were given a list of procs to modex with, then this is happening
|
||||
* as part of a connect/accept operation. In this case, we -must- do the
|
||||
@ -509,7 +509,7 @@ static int modex(opal_list_t *procs)
|
||||
if (orte_hetero_apps || !orte_homogeneous_nodes) {
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base_output,
|
||||
"%s grpcomm:hier: modex is required",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
|
||||
if (ORTE_SUCCESS != (rc = orte_grpcomm_base_peer_modex(false))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
@ -525,7 +525,7 @@ static int modex(opal_list_t *procs)
|
||||
*/
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base_output,
|
||||
"%s grpcomm:hier: modex is required",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
if (ORTE_SUCCESS != (rc = orte_grpcomm_base_peer_modex(false))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
}
|
||||
@ -540,7 +540,7 @@ static int modex(opal_list_t *procs)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base_output,
|
||||
"%s grpcomm:hier:modex reading %s file",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), opal_profile_file));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME), opal_profile_file));
|
||||
|
||||
/* loop through file until end */
|
||||
boptr = &bo;
|
||||
@ -604,7 +604,7 @@ static int modex(opal_list_t *procs)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base_output,
|
||||
"%s grpcomm:hier: modex completed",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
|
||||
return rc;
|
||||
}
|
||||
@ -614,7 +614,7 @@ static int set_proc_attr(const char *attr_name, const void *data, size_t size)
|
||||
{
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base_output,
|
||||
"%s grpcomm:hier:set_proc_attr for attribute %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), attr_name));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME), attr_name));
|
||||
|
||||
/* we always have to set our own attributes in case they are needed for
|
||||
* a connect/accept at some later time
|
||||
@ -635,8 +635,8 @@ static int get_proc_attr(const orte_process_name_t proc,
|
||||
/* proc wasn't found - return error */
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_output,
|
||||
"%s grpcomm:hier:get_proc_attr: no modex entry for proc %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&proc)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(&proc)));
|
||||
return ORTE_ERR_NOT_FOUND;
|
||||
|
||||
}
|
||||
@ -658,8 +658,8 @@ static int get_proc_attr(const orte_process_name_t proc,
|
||||
*size = attr->size;
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_output,
|
||||
"%s grpcomm:hier:get_proc_attr: found %d bytes for attr %s on proc %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (int)attr->size,
|
||||
attribute_name, ORTE_NAME_PRINT(&proc)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME), (int)attr->size,
|
||||
attribute_name, orte_util_print_name_args(&proc)));
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
}
|
||||
@ -667,8 +667,8 @@ static int get_proc_attr(const orte_process_name_t proc,
|
||||
/* get here if attribute isn't found */
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_output,
|
||||
"%s grpcomm:hier:get_proc_attr: no attr avail or zero byte size for proc %s attribute %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&proc), attribute_name));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(&proc), attribute_name));
|
||||
*val = NULL;
|
||||
*size = 0;
|
||||
|
||||
|
@ -162,8 +162,8 @@ typedef struct orte_iof_base_t orte_iof_base_t;
|
||||
orte_iof_read_event_t *rev; \
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output, \
|
||||
"%s defining read event for %s: %s %d", \
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), \
|
||||
ORTE_NAME_PRINT((nm)), \
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME), \
|
||||
orte_util_print_name_args((nm)), \
|
||||
__FILE__, __LINE__)); \
|
||||
rev = OBJ_NEW(orte_iof_read_event_t); \
|
||||
*(rv) = rev; \
|
||||
|
@ -94,8 +94,8 @@ static void orte_iof_base_sink_destruct(orte_iof_sink_t* ptr)
|
||||
{
|
||||
OPAL_OUTPUT_VERBOSE((20, orte_iof_base.iof_output,
|
||||
"%s iof: closing sink for process %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&ptr->name)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(&ptr->name)));
|
||||
if (NULL != ptr->wev) {
|
||||
OBJ_RELEASE(ptr->wev);
|
||||
}
|
||||
@ -116,8 +116,8 @@ static void orte_iof_base_read_event_destruct(orte_iof_read_event_t* rev)
|
||||
if (0 <= rev->ev.ev_fd) {
|
||||
OPAL_OUTPUT_VERBOSE((20, orte_iof_base.iof_output,
|
||||
"%s iof: closing fd %d for process %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
rev->ev.ev_fd, ORTE_NAME_PRINT(&rev->name)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
rev->ev.ev_fd, orte_util_print_name_args(&rev->name)));
|
||||
close(rev->ev.ev_fd);
|
||||
}
|
||||
}
|
||||
@ -140,7 +140,7 @@ static void orte_iof_base_write_event_destruct(orte_iof_write_event_t* wev)
|
||||
if (2 < wev->fd) {
|
||||
OPAL_OUTPUT_VERBOSE((20, orte_iof_base.iof_output,
|
||||
"%s iof: closing fd %d for write event",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), wev->fd));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME), wev->fd));
|
||||
close(wev->fd);
|
||||
}
|
||||
OBJ_DESTRUCT(&wev->outputs);
|
||||
|
@ -51,9 +51,9 @@ int orte_iof_base_write_output(orte_process_name_t *name, orte_iof_tag_t stream,
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output,
|
||||
"%s write:output setting up to write %d bytes to %s for %s on fd %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), numbytes,
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME), numbytes,
|
||||
(ORTE_IOF_STDIN & stream) ? "stdin" : ((ORTE_IOF_STDOUT & stream) ? "stdout" : ((ORTE_IOF_STDERR & stream) ? "stderr" : "stddiag")),
|
||||
ORTE_NAME_PRINT(name), channel->fd));
|
||||
orte_util_print_name_args(name), channel->fd));
|
||||
|
||||
/* setup output object */
|
||||
output = OBJ_NEW(orte_iof_write_output_t);
|
||||
@ -83,7 +83,7 @@ int orte_iof_base_write_output(orte_process_name_t *name, orte_iof_tag_t stream,
|
||||
/* error - this should never happen */
|
||||
ORTE_ERROR_LOG(ORTE_ERR_VALUE_OUT_OF_BOUNDS);
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output,
|
||||
"%s stream %0x", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), stream));
|
||||
"%s stream %0x", orte_util_print_name_args(ORTE_PROC_MY_NAME), stream));
|
||||
return ORTE_ERR_VALUE_OUT_OF_BOUNDS;
|
||||
}
|
||||
|
||||
@ -91,7 +91,7 @@ int orte_iof_base_write_output(orte_process_name_t *name, orte_iof_tag_t stream,
|
||||
* timestamping of xml output
|
||||
*/
|
||||
if (orte_xml_output) {
|
||||
snprintf(starttag, ORTE_IOF_BASE_TAG_MAX, "<%s rank=\"%s\">", suffix, ORTE_VPID_PRINT(name->vpid));
|
||||
snprintf(starttag, ORTE_IOF_BASE_TAG_MAX, "<%s rank=\"%s\">", suffix, orte_util_print_vpids(name->vpid));
|
||||
snprintf(endtag, ORTE_IOF_BASE_TAG_MAX, "</%s>", suffix);
|
||||
goto construct;
|
||||
}
|
||||
@ -108,8 +108,8 @@ int orte_iof_base_write_output(orte_process_name_t *name, orte_iof_tag_t stream,
|
||||
if (orte_tag_output) {
|
||||
/* if we want it tagged as well, use both */
|
||||
snprintf(starttag, ORTE_IOF_BASE_TAG_MAX, "%s[%s,%s]<%s>:",
|
||||
cptr, ORTE_LOCAL_JOBID_PRINT(name->jobid),
|
||||
ORTE_VPID_PRINT(name->vpid), suffix);
|
||||
cptr, orte_util_print_local_jobid(name->jobid),
|
||||
orte_util_print_vpids(name->vpid), suffix);
|
||||
} else {
|
||||
/* only use timestamp */
|
||||
snprintf(starttag, ORTE_IOF_BASE_TAG_MAX, "%s<%s>:", cptr, suffix);
|
||||
@ -121,8 +121,8 @@ int orte_iof_base_write_output(orte_process_name_t *name, orte_iof_tag_t stream,
|
||||
|
||||
if (orte_tag_output) {
|
||||
snprintf(starttag, ORTE_IOF_BASE_TAG_MAX, "[%s,%s]<%s>:",
|
||||
ORTE_LOCAL_JOBID_PRINT(name->jobid),
|
||||
ORTE_VPID_PRINT(name->vpid), suffix);
|
||||
orte_util_print_local_jobid(name->jobid),
|
||||
orte_util_print_vpids(name->vpid), suffix);
|
||||
/* no endtag for this option */
|
||||
memset(endtag, '\0', ORTE_IOF_BASE_TAG_MAX);
|
||||
goto construct;
|
||||
@ -186,7 +186,7 @@ process:
|
||||
/* issue it */
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output,
|
||||
"%s write:output adding write event",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
opal_event_add(&channel->ev, 0);
|
||||
channel->pending = true;
|
||||
}
|
||||
@ -207,7 +207,7 @@ void orte_iof_base_write_handler(int fd, short event, void *cbdata)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output,
|
||||
"%s write:handler writing data to %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
wev->fd));
|
||||
|
||||
/* lock us up to protect global operations */
|
||||
|
@ -113,8 +113,8 @@ static int hnp_push(const orte_process_name_t* dst_name, orte_iof_tag_t src_tag,
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output,
|
||||
"%s iof:hnp pushing fd %d for process %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
fd, ORTE_NAME_PRINT(dst_name)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
fd, orte_util_print_name_args(dst_name)));
|
||||
|
||||
if (!(src_tag & ORTE_IOF_STDIN)) {
|
||||
/* set the file descriptor to non-blocking - do this before we setup
|
||||
@ -318,8 +318,8 @@ static int hnp_pull(const orte_process_name_t* dst_name,
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output,
|
||||
"%s iof:hnp pulling fd %d for process %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
fd, ORTE_NAME_PRINT(dst_name)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
fd, orte_util_print_name_args(dst_name)));
|
||||
|
||||
/* set the file descriptor to non-blocking - do this before we setup
|
||||
* the sink in case it fires right away
|
||||
@ -391,7 +391,7 @@ static void stdin_write_handler(int fd, short event, void *cbdata)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output,
|
||||
"%s hnp:stdin:write:handler writing data to %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
wev->fd));
|
||||
|
||||
/* lock us up to protect global operations */
|
||||
@ -413,7 +413,7 @@ static void stdin_write_handler(int fd, short event, void *cbdata)
|
||||
*/
|
||||
OPAL_OUTPUT_VERBOSE((20, orte_iof_base.iof_output,
|
||||
"%s iof:hnp closing fd %d on write event due to zero bytes output",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), wev->fd));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME), wev->fd));
|
||||
OBJ_RELEASE(wev);
|
||||
sink->wev = NULL;
|
||||
/* just leave - we don't want to restart the
|
||||
@ -424,7 +424,7 @@ static void stdin_write_handler(int fd, short event, void *cbdata)
|
||||
num_written = write(wev->fd, output->data, output->numbytes);
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output,
|
||||
"%s hnp:stdin:write:handler wrote %d bytes",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
num_written));
|
||||
if (num_written < 0) {
|
||||
if (EAGAIN == errno || EINTR == errno) {
|
||||
@ -443,14 +443,14 @@ static void stdin_write_handler(int fd, short event, void *cbdata)
|
||||
OBJ_RELEASE(output);
|
||||
OPAL_OUTPUT_VERBOSE((20, orte_iof_base.iof_output,
|
||||
"%s iof:hnp closing fd %d on write event due to negative bytes written",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), wev->fd));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME), wev->fd));
|
||||
OBJ_RELEASE(wev);
|
||||
sink->wev = NULL;
|
||||
goto DEPART;
|
||||
} else if (num_written < output->numbytes) {
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output,
|
||||
"%s hnp:stdin:write:handler incomplete write %d - adjusting data",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), num_written));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME), num_written));
|
||||
/* incomplete write - adjust data to avoid duplicate output */
|
||||
memmove(output->data, &output->data[num_written], output->numbytes - num_written);
|
||||
/* push this item back on the front of the list */
|
||||
|
@ -180,7 +180,7 @@ orte_iof_hnp_exception_handler(const orte_process_name_t* peer, orte_rml_excepti
|
||||
orte_iof_base_endpoint_t *endpoint;
|
||||
opal_output_verbose(1, orte_iof_base.iof_output,
|
||||
"iof svc exception handler! %s\n",
|
||||
ORTE_NAME_PRINT((orte_process_name_t*)peer));
|
||||
orte_util_print_name_args((orte_process_name_t*)peer));
|
||||
|
||||
/* If we detect an exception on the RML connection to a peer,
|
||||
delete all of its subscriptions and publications. Note that
|
||||
|
@ -114,8 +114,8 @@ void orte_iof_hnp_read_local_handler(int fd, short event, void *cbdata)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output,
|
||||
"%s iof:hnp:read handler %s Error on connection:%d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&rev->name), fd));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(&rev->name), fd));
|
||||
/* Un-recoverable error. Allow the code to flow as usual in order to
|
||||
* to send the zero bytes message up the stream, and then close the
|
||||
* file descriptor and delete the event.
|
||||
@ -149,8 +149,8 @@ void orte_iof_hnp_read_local_handler(int fd, short event, void *cbdata)
|
||||
ORTE_PROC_MY_NAME->vpid == sink->daemon.vpid) {
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output,
|
||||
"%s read %d bytes from stdin - writing to %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), numbytes,
|
||||
ORTE_NAME_PRINT(&rev->name)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME), numbytes,
|
||||
orte_util_print_name_args(&rev->name)));
|
||||
/* send the bytes down the pipe - we even send 0 byte events
|
||||
* down the pipe so it forces out any preceding data before
|
||||
* closing the output stream
|
||||
@ -170,8 +170,8 @@ void orte_iof_hnp_read_local_handler(int fd, short event, void *cbdata)
|
||||
} else {
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output,
|
||||
"%s sending %d bytes from stdin to daemon %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), numbytes,
|
||||
ORTE_NAME_PRINT(&sink->daemon)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME), numbytes,
|
||||
orte_util_print_name_args(&sink->daemon)));
|
||||
|
||||
/* send the data to the daemon so it can
|
||||
* write it to the proc's fd - in this case,
|
||||
@ -226,17 +226,17 @@ void orte_iof_hnp_read_local_handler(int fd, short event, void *cbdata)
|
||||
*/
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output,
|
||||
"%s sending data to tool %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&sink->daemon)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(&sink->daemon)));
|
||||
orte_iof_hnp_send_data_to_endpoint(&sink->daemon, &rev->name, rev->tag, data, numbytes);
|
||||
}
|
||||
}
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output,
|
||||
"%s read %d bytes from %s of %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), numbytes,
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME), numbytes,
|
||||
(ORTE_IOF_STDOUT & rev->tag) ? "stdout" : ((ORTE_IOF_STDERR & rev->tag) ? "stderr" : "stddiag"),
|
||||
ORTE_NAME_PRINT(&rev->name)));
|
||||
orte_util_print_name_args(&rev->name)));
|
||||
|
||||
if (0 == numbytes) {
|
||||
/* if we read 0 bytes from the stdout/err/diag, there is
|
||||
|
@ -97,9 +97,9 @@ static void process_msg(int fd, short event, void *cbdata)
|
||||
if (ORTE_IOF_PULL & stream) {
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output,
|
||||
"%s received pull cmd from remote tool %s for proc %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&mev->sender),
|
||||
ORTE_NAME_PRINT(&origin)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(&mev->sender),
|
||||
orte_util_print_name_args(&origin)));
|
||||
/* a tool is requesting that we send it a copy of the specified stream(s)
|
||||
* from the specified process(es), so create a sink for it
|
||||
*/
|
||||
@ -127,9 +127,9 @@ static void process_msg(int fd, short event, void *cbdata)
|
||||
if (ORTE_IOF_CLOSE & stream) {
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output,
|
||||
"%s received close cmd from remote tool %s for proc %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&mev->sender),
|
||||
ORTE_NAME_PRINT(&origin)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(&mev->sender),
|
||||
orte_util_print_name_args(&origin)));
|
||||
/* a tool is requesting that we no longer forward a copy of the
|
||||
* specified stream(s) from the specified process(es) - remove the sink
|
||||
*/
|
||||
@ -169,8 +169,8 @@ static void process_msg(int fd, short event, void *cbdata)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output,
|
||||
"%s unpacked %d bytes from remote proc %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), numbytes,
|
||||
ORTE_NAME_PRINT(&origin)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME), numbytes,
|
||||
orte_util_print_name_args(&origin)));
|
||||
|
||||
/* output this to our local output */
|
||||
if (ORTE_IOF_STDOUT & stream) {
|
||||
@ -212,8 +212,8 @@ void orte_iof_hnp_recv(int status, orte_process_name_t* sender,
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_iof_base.iof_output,
|
||||
"%s iof:hnp:receive got message from %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(sender)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(sender)));
|
||||
|
||||
/* don't process this right away - we need to get out of the recv before
|
||||
* we process the message to avoid performing the rest of the job while
|
||||
|
@ -99,8 +99,8 @@ static int orted_push(const orte_process_name_t* dst_name, orte_iof_tag_t src_ta
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output,
|
||||
"%s iof:orted pushing fd %d for process %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
fd, ORTE_NAME_PRINT(dst_name)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
fd, orte_util_print_name_args(dst_name)));
|
||||
|
||||
/* set the file descriptor to non-blocking - do this before we setup
|
||||
* and activate the read event in case it fires right away
|
||||
@ -219,8 +219,8 @@ static int orted_pull(const orte_process_name_t* dst_name,
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output,
|
||||
"%s iof:orted pulling fd %d for process %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
fd, ORTE_NAME_PRINT(dst_name)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
fd, orte_util_print_name_args(dst_name)));
|
||||
|
||||
/* set the file descriptor to non-blocking - do this before we setup
|
||||
* the sink in case it fires right away
|
||||
@ -298,7 +298,7 @@ static void stdin_write_handler(int fd, short event, void *cbdata)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output,
|
||||
"%s orted:stdin:write:handler writing data to %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
wev->fd));
|
||||
|
||||
/* lock us up to protect global operations */
|
||||
@ -313,7 +313,7 @@ static void stdin_write_handler(int fd, short event, void *cbdata)
|
||||
*/
|
||||
OPAL_OUTPUT_VERBOSE((20, orte_iof_base.iof_output,
|
||||
"%s iof:orted closing fd %d on write event due to zero bytes output",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), wev->fd));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME), wev->fd));
|
||||
OBJ_RELEASE(wev);
|
||||
sink->wev = NULL;
|
||||
goto DEPART;
|
||||
@ -321,7 +321,7 @@ static void stdin_write_handler(int fd, short event, void *cbdata)
|
||||
num_written = write(wev->fd, output->data, output->numbytes);
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output,
|
||||
"%s orted:stdin:write:handler wrote %d bytes",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
num_written));
|
||||
if (num_written < 0) {
|
||||
if (EAGAIN == errno || EINTR == errno) {
|
||||
@ -340,7 +340,7 @@ static void stdin_write_handler(int fd, short event, void *cbdata)
|
||||
OBJ_RELEASE(output);
|
||||
OPAL_OUTPUT_VERBOSE((20, orte_iof_base.iof_output,
|
||||
"%s iof:orted closing fd %d on write event due to negative bytes written",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), wev->fd));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME), wev->fd));
|
||||
OBJ_RELEASE(wev);
|
||||
sink->wev = NULL;
|
||||
/* tell the HNP to stop sending us stuff */
|
||||
@ -352,7 +352,7 @@ static void stdin_write_handler(int fd, short event, void *cbdata)
|
||||
} else if (num_written < output->numbytes) {
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output,
|
||||
"%s orted:stdin:write:handler incomplete write %d - adjusting data",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), num_written));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME), num_written));
|
||||
/* incomplete write - adjust data to avoid duplicate output */
|
||||
memmove(output->data, &output->data[num_written], output->numbytes - num_written);
|
||||
/* push this item back on the front of the list */
|
||||
|
@ -81,8 +81,8 @@ void orte_iof_orted_read_handler(int fd, short event, void *cbdata)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output,
|
||||
"%s iof:orted:read handler read %d bytes from %s, fd %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
numbytes, ORTE_NAME_PRINT(&rev->name), fd));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
numbytes, orte_util_print_name_args(&rev->name), fd));
|
||||
|
||||
if (numbytes <= 0) {
|
||||
if (0 > numbytes) {
|
||||
@ -96,8 +96,8 @@ void orte_iof_orted_read_handler(int fd, short event, void *cbdata)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output,
|
||||
"%s iof:orted:read handler %s Error on connection:%d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&rev->name), fd));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(&rev->name), fd));
|
||||
}
|
||||
/* numbytes must have been zero, so go down and close the fd etc */
|
||||
goto CLEAN_RETURN;
|
||||
@ -156,7 +156,7 @@ void orte_iof_orted_read_handler(int fd, short event, void *cbdata)
|
||||
/* start non-blocking RML call to forward received data */
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output,
|
||||
"%s iof:orted:read handler sending %d bytes to HNP",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), numbytes));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME), numbytes));
|
||||
|
||||
orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, buf, ORTE_RML_TAG_IOF_HNP,
|
||||
0, send_cb, NULL);
|
||||
|
@ -66,7 +66,7 @@ void orte_iof_orted_send_xonxoff(orte_iof_tag_t tag)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output,
|
||||
"%s sending %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
(ORTE_IOF_XON == tag) ? "xon" : "xoff"));
|
||||
|
||||
/* send the buffer to the HNP */
|
||||
@ -122,8 +122,8 @@ static void process_msg(int fd, short event, void *cbdata)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output,
|
||||
"%s unpacked %d bytes for local proc %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), numbytes,
|
||||
ORTE_NAME_PRINT(&target)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME), numbytes,
|
||||
orte_util_print_name_args(&target)));
|
||||
|
||||
/* cycle through our list of sinks */
|
||||
for (item = opal_list_get_first(&mca_iof_orted_component.sinks);
|
||||
@ -138,8 +138,8 @@ static void process_msg(int fd, short event, void *cbdata)
|
||||
sink->name.vpid == target.vpid) {
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output,
|
||||
"%s writing data to local proc %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&sink->name)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(&sink->name)));
|
||||
if (NULL == sink->wev || sink->wev->fd < 0) {
|
||||
/* this sink was already closed - ignore this data */
|
||||
goto CLEAN_RETURN;
|
||||
@ -176,8 +176,8 @@ void orte_iof_orted_recv(int status, orte_process_name_t* sender,
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output,
|
||||
"%s iof:orted:receive got message from %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(sender)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(sender)));
|
||||
|
||||
/* don't process this right away - we need to get out of the recv before
|
||||
* we process the message to avoid performing the rest of the job while
|
||||
|
@ -114,8 +114,8 @@ static int tool_pull(const orte_process_name_t* src_name,
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output,
|
||||
"%s pulling output for proc %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(src_name)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(src_name)));
|
||||
|
||||
buf = OBJ_NEW(opal_buffer_t);
|
||||
|
||||
@ -160,8 +160,8 @@ static int tool_close(const orte_process_name_t* src_name,
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output,
|
||||
"%s closing output for proc %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(src_name)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(src_name)));
|
||||
|
||||
buf = OBJ_NEW(opal_buffer_t);
|
||||
|
||||
|
@ -64,8 +64,8 @@ static void process_msg(int fd, short event, void *cbdata)
|
||||
if (ORTE_IOF_CLOSE & stream) {
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output,
|
||||
"%s received CLOSE handshake from remote hnp %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&mev->sender)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(&mev->sender)));
|
||||
mca_iof_tool_component.closed = true;
|
||||
goto CLEAN_RETURN;
|
||||
}
|
||||
@ -87,8 +87,8 @@ static void process_msg(int fd, short event, void *cbdata)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output,
|
||||
"%s unpacked %d bytes from remote proc %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), numbytes,
|
||||
ORTE_NAME_PRINT(&origin)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME), numbytes,
|
||||
orte_util_print_name_args(&origin)));
|
||||
|
||||
/* if numbytes is zero, it means that the channel was closed on the far end - for
|
||||
* now, we just ignore this condition
|
||||
@ -116,8 +116,8 @@ void orte_iof_tool_recv(int status, orte_process_name_t* sender,
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_iof_base.iof_output,
|
||||
"%s iof:tool:receive got message from %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(sender)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(sender)));
|
||||
|
||||
/* don't process this right away - we need to get out of the recv before
|
||||
* we process the message to avoid performing the rest of the job while
|
||||
|
@ -110,7 +110,7 @@ static void mypeerlog(int severity, int errcode, orte_process_name_t *peer_proc,
|
||||
|
||||
if (peer_proc) {
|
||||
peer_host = orte_ess.proc_get_hostname(peer_proc);
|
||||
peer_name = ORTE_NAME_PRINT(peer_proc);
|
||||
peer_name = orte_util_print_name_args(peer_proc);
|
||||
}
|
||||
|
||||
len = snprintf(pos, space,
|
||||
@ -118,7 +118,7 @@ static void mypeerlog(int severity, int errcode, orte_process_name_t *peer_proc,
|
||||
" proc %s on node %s encountered an error ",
|
||||
peer_name ? peer_name : "UNKNOWN",
|
||||
peer_host ? peer_host : "UNKNOWN",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_proc_info.nodename);
|
||||
space -= len;
|
||||
pos += len;
|
||||
|
@ -329,7 +329,7 @@ int orte_odls_base_default_update_daemon_info(opal_buffer_t *data)
|
||||
*/
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_odls_globals.output,
|
||||
"%s odls:update:daemon:info updating nidmap",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
|
||||
if (ORTE_SUCCESS != (rc = orte_ess.update_nidmap(bo))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
@ -390,7 +390,7 @@ int orte_odls_base_default_construct_child_list(opal_buffer_t *data,
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_odls_globals.output,
|
||||
"%s odls:constructing child list",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
|
||||
/* unpack the returned data to create the required structures
|
||||
* for a fork launch. Since the data will contain information
|
||||
@ -410,7 +410,7 @@ int orte_odls_base_default_construct_child_list(opal_buffer_t *data,
|
||||
if (0 != flag) {
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_odls_globals.output,
|
||||
"%s odls:construct_child_list unpacking debugger daemon",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
/* yep - create a jobdat object for it. In this case, we don't have to
|
||||
* worry about race conditions as the debugger daemons do not use
|
||||
* the daemon collective system
|
||||
@ -453,7 +453,7 @@ int orte_odls_base_default_construct_child_list(opal_buffer_t *data,
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_odls_globals.output,
|
||||
"%s odls:construct_child_list unpacking data to launch job %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_JOBID_PRINT(*job)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME), orte_util_print_jobids(*job)));
|
||||
|
||||
/* even though we are unpacking an add_local_procs cmd, we cannot assume
|
||||
* that no job record for this jobid exists. A race condition exists that
|
||||
@ -470,7 +470,7 @@ int orte_odls_base_default_construct_child_list(opal_buffer_t *data,
|
||||
if (jdat->jobid == *job) {
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_odls_globals.output,
|
||||
"%s odls:construct_child_list found existing jobdat for job %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_JOBID_PRINT(*job)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME), orte_util_print_jobids(*job)));
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -478,7 +478,7 @@ int orte_odls_base_default_construct_child_list(opal_buffer_t *data,
|
||||
/* setup jobdat object for this job */
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_odls_globals.output,
|
||||
"%s odls:construct_child_list adding new jobdat for job %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_JOBID_PRINT(*job)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME), orte_util_print_jobids(*job)));
|
||||
jobdat = OBJ_NEW(orte_odls_job_t);
|
||||
jobdat->jobid = *job;
|
||||
opal_list_append(&orte_local_jobdata, &jobdat->super);
|
||||
@ -529,7 +529,7 @@ int orte_odls_base_default_construct_child_list(opal_buffer_t *data,
|
||||
}
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_odls_globals.output,
|
||||
"%s odls:construct_child_list unpacking %ld app_contexts",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (long)jobdat->num_apps));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME), (long)jobdat->num_apps));
|
||||
|
||||
/* allocate space and unpack the app_contexts for this job - the HNP checked
|
||||
* that there must be at least one, so don't bother checking here again
|
||||
@ -599,15 +599,15 @@ int orte_odls_base_default_construct_child_list(opal_buffer_t *data,
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_odls_globals.output,
|
||||
"%s odls:constructing child list - checking proc %s on daemon %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_VPID_PRINT(j),
|
||||
ORTE_VPID_PRINT(host_daemon)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME), orte_util_print_vpids(j),
|
||||
orte_util_print_vpids(host_daemon)));
|
||||
|
||||
/* does this proc belong to us? */
|
||||
if (ORTE_PROC_MY_NAME->vpid == host_daemon) {
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_odls_globals.output,
|
||||
"%s odls:constructing child list - found proc %s for me!",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_VPID_PRINT(j)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME), orte_util_print_vpids(j)));
|
||||
|
||||
/* keep tabs of the number of local procs */
|
||||
jobdat->num_local_procs++;
|
||||
@ -708,7 +708,7 @@ static int odls_base_default_setup_fork(orte_app_context_t *context,
|
||||
if (opal_sys_limits.initialized) {
|
||||
OPAL_OUTPUT_VERBOSE((10, orte_odls_globals.output,
|
||||
"%s limit on num procs %d num children %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
opal_sys_limits.num_procs,
|
||||
(int)opal_list_get_size(&orte_local_children)));
|
||||
if (0 < opal_sys_limits.num_procs &&
|
||||
@ -1008,7 +1008,7 @@ int orte_odls_base_default_launch_local(orte_jobid_t job,
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_odls_globals.output,
|
||||
"%s odls:launch found %d processors for %d children and set oversubscribed to %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
num_processors, (int)opal_list_get_size(&orte_local_children),
|
||||
oversubscribed ? "true" : "false"));
|
||||
|
||||
@ -1034,7 +1034,7 @@ int orte_odls_base_default_launch_local(orte_jobid_t job,
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((10, orte_odls_globals.output,
|
||||
"%s odls:launch:setup_fork failed with error %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
ORTE_ERROR_NAME(rc)));
|
||||
|
||||
/* do not ERROR_LOG this failure - it will be reported
|
||||
@ -1163,8 +1163,8 @@ int orte_odls_base_default_launch_local(orte_jobid_t job,
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_odls_globals.output,
|
||||
"%s odls:launch child %s is already alive",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(child->name)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(child->name)));
|
||||
|
||||
continue;
|
||||
}
|
||||
@ -1177,9 +1177,9 @@ int orte_odls_base_default_launch_local(orte_jobid_t job,
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_odls_globals.output,
|
||||
"%s odls:launch child %s is not in job %s being launched",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(child->name),
|
||||
ORTE_JOBID_PRINT(job)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(child->name),
|
||||
orte_util_print_jobids(job)));
|
||||
|
||||
continue;
|
||||
}
|
||||
@ -1210,7 +1210,7 @@ int orte_odls_base_default_launch_local(orte_jobid_t job,
|
||||
}
|
||||
/* insert the rank into the correct place as a window title */
|
||||
free(app->argv[2]);
|
||||
asprintf(&app->argv[2], "Rank %s", ORTE_VPID_PRINT(child->name->vpid));
|
||||
asprintf(&app->argv[2], "Rank %s", orte_util_print_vpids(child->name->vpid));
|
||||
/* add back the original argv */
|
||||
for (inm=0; inm < opal_argv_count(argvsav); inm++) {
|
||||
opal_argv_append_nosize(&app->argv, argvsav[inm]);
|
||||
@ -1334,8 +1334,8 @@ int orte_odls_base_default_launch_local(orte_jobid_t job,
|
||||
#endif
|
||||
if (5 < opal_output_get_verbosity(orte_odls_globals.output)) {
|
||||
opal_output(orte_odls_globals.output, "%s odls:launch: spawning child %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(child->name));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(child->name));
|
||||
|
||||
/* dump what is going to be exec'd */
|
||||
if (7 < opal_output_get_verbosity(orte_odls_globals.output)) {
|
||||
@ -1385,8 +1385,8 @@ int orte_odls_base_default_launch_local(orte_jobid_t job,
|
||||
CLEANUP:
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_odls_globals.output,
|
||||
"%s odls:launch reporting job %s launch status",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_JOBID_PRINT(job)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_jobids(job)));
|
||||
/* pack the launch results */
|
||||
if (ORTE_SUCCESS != (ret = pack_state_update(&alert, true, jobdat))) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
@ -1406,7 +1406,7 @@ CLEANUP:
|
||||
0 < opal_list_get_size(&orte_local_children)) {
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_odls_globals.output,
|
||||
"%s odls:launch forking debugger with %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
(ORTE_JOB_CONTROL_FORWARD_OUTPUT & orte_odls_globals.debugger->controls) ? "output forwarded" : "no output"));
|
||||
|
||||
fork_local(orte_odls_globals.debugger->apps[0], NULL, NULL,
|
||||
@ -1434,7 +1434,7 @@ CLEANUP:
|
||||
if (!launch_failed) {
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_odls_globals.output,
|
||||
"%s odls:launch setting waitpids",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
|
||||
/* if the launch didn't fail, setup the waitpids on the children */
|
||||
for (item = opal_list_get_first(&orte_local_children);
|
||||
@ -1482,8 +1482,8 @@ int orte_odls_base_default_deliver_message(orte_jobid_t job, opal_buffer_t *buff
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_odls_globals.output,
|
||||
"%s odls: sending message to tag %lu on child %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
(unsigned long)tag, ORTE_NAME_PRINT(child->name)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
(unsigned long)tag, orte_util_print_name_args(child->name)));
|
||||
|
||||
/* if so, send the message */
|
||||
rc = orte_rml.send_buffer(child->name, buffer, tag, 0);
|
||||
@ -1515,8 +1515,8 @@ int orte_odls_base_default_signal_local_procs(const orte_process_name_t *proc, i
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_odls_globals.output,
|
||||
"%s odls: signaling proc %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
(NULL == proc) ? "NULL" : ORTE_NAME_PRINT(proc)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
(NULL == proc) ? "NULL" : orte_util_print_name_args(proc)));
|
||||
|
||||
/* protect operations involving the global list of children */
|
||||
OPAL_THREAD_LOCK(&orte_odls_globals.mutex);
|
||||
@ -1707,8 +1707,8 @@ int orte_odls_base_default_require_sync(orte_process_name_t *proc,
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_odls_globals.output,
|
||||
"%s odls: registering sync on child %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(child->name)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(child->name)));
|
||||
|
||||
found = true;
|
||||
break;
|
||||
@ -1769,8 +1769,8 @@ int orte_odls_base_default_require_sync(orte_process_name_t *proc,
|
||||
}
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_odls_globals.output,
|
||||
"%s odls:sync nidmap requested for job %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_JOBID_PRINT(jobdat->jobid)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_jobids(jobdat->jobid)));
|
||||
/* the proc needs a copy of both the daemon/node map, and
|
||||
* the process map for its peers
|
||||
*/
|
||||
@ -1780,8 +1780,8 @@ int orte_odls_base_default_require_sync(orte_process_name_t *proc,
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_odls_globals.output,
|
||||
"%s odls: sending sync ack to child %s with %ld bytes of data",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(proc), (long)buffer.bytes_used));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(proc), (long)buffer.bytes_used));
|
||||
|
||||
if (0 > (rc = orte_rml.send_buffer(proc, &buffer, ORTE_RML_TAG_SYNC, 0))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
@ -1799,7 +1799,7 @@ int orte_odls_base_default_require_sync(orte_process_name_t *proc,
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_odls_globals.output,
|
||||
"%s odls: sending contact info to HNP",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
|
||||
OBJ_CONSTRUCT(&buffer, opal_buffer_t);
|
||||
/* store jobid */
|
||||
@ -1917,8 +1917,8 @@ static void check_proc_complete(orte_odls_child_t *child)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_odls_globals.output,
|
||||
"%s odls:proc_complete reporting proc %s aborted to HNP",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(child->name)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(child->name)));
|
||||
|
||||
/* if we are the HNP, then we would rather not send this to ourselves -
|
||||
* instead, we queue it up for local processing
|
||||
@ -1966,8 +1966,8 @@ static void check_proc_complete(orte_odls_child_t *child)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_odls_globals.output,
|
||||
"%s odls:proc_complete reporting all procs in %s terminated",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_JOBID_PRINT(jdat->jobid)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_jobids(jdat->jobid)));
|
||||
|
||||
/* remove all of this job's children from the global list - do not lock
|
||||
* the thread as we are already locked
|
||||
@ -2020,8 +2020,8 @@ void orte_odls_base_notify_iof_complete(orte_process_name_t *proc)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_odls_globals.output,
|
||||
"%s odls:notify_iof_complete for child %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(proc)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(proc)));
|
||||
|
||||
/* since we are going to be working with the global list of
|
||||
* children, we need to protect that list from modification
|
||||
@ -2048,8 +2048,8 @@ void orte_odls_base_notify_iof_complete(orte_process_name_t *proc)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_odls_globals.output,
|
||||
"%s odls:proc_complete did not find child %s in table!",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(proc)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(proc)));
|
||||
|
||||
/* it's just a race condition - don't error log it */
|
||||
opal_condition_signal(&orte_odls_globals.cond);
|
||||
@ -2098,8 +2098,8 @@ void orte_base_default_waitpid_fired(orte_process_name_t *proc, int32_t status)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_odls_globals.output,
|
||||
"%s odls:waitpid_fired did not find child %s in table!",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(proc)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(proc)));
|
||||
|
||||
/* it's just a race condition - don't error log it */
|
||||
opal_condition_signal(&orte_odls_globals.cond);
|
||||
@ -2113,8 +2113,8 @@ GOTCHILD:
|
||||
if (!child->alive) {
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_odls_globals.output,
|
||||
"%s odls:waitpid_fired child %s was already dead",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(child->name)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(child->name)));
|
||||
goto MOVEON;
|
||||
}
|
||||
|
||||
@ -2147,7 +2147,7 @@ GOTCHILD:
|
||||
job, vpid, "abort", NULL );
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_odls_globals.output,
|
||||
"%s odls:waitpid_fired checking abort file %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), abort_file));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME), abort_file));
|
||||
|
||||
free(job);
|
||||
free(vpid);
|
||||
@ -2158,8 +2158,8 @@ GOTCHILD:
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_odls_globals.output,
|
||||
"%s odls:waitpid_fired child %s died by abort",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(child->name)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(child->name)));
|
||||
|
||||
child->state = ORTE_PROC_STATE_ABORTED;
|
||||
free(abort_file);
|
||||
@ -2177,8 +2177,8 @@ GOTCHILD:
|
||||
"%s odls:waitpid_fired child process %s terminated normally "
|
||||
"but did not provide a required sync - it "
|
||||
"will be treated as an abnormal termination",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(child->name)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(child->name)));
|
||||
|
||||
goto MOVEON;
|
||||
} else {
|
||||
@ -2187,8 +2187,8 @@ GOTCHILD:
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_odls_globals.output,
|
||||
"%s odls:waitpid_fired child process %s terminated normally",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(child->name)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(child->name)));
|
||||
|
||||
}
|
||||
} else {
|
||||
@ -2209,8 +2209,8 @@ GOTCHILD:
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_odls_globals.output,
|
||||
"%s odls:waitpid_fired child process %s terminated with signal",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(child->name)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(child->name)));
|
||||
}
|
||||
|
||||
MOVEON:
|
||||
@ -2240,7 +2240,7 @@ void odls_base_default_wait_local_proc(pid_t pid, int status, void* cbdata)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_odls_globals.output,
|
||||
"%s odls:wait_local_proc child process %ld terminated",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
(long)pid));
|
||||
|
||||
/* since we are going to be working with the global list of
|
||||
@ -2291,7 +2291,7 @@ void odls_base_default_wait_local_proc(pid_t pid, int status, void* cbdata)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_odls_globals.output,
|
||||
"%s odls:wait_local_proc did not find pid %ld in table!",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
(long)pid));
|
||||
|
||||
/* it's just a race condition - don't error log it */
|
||||
@ -2318,8 +2318,8 @@ int orte_odls_base_default_kill_local_procs(orte_jobid_t job, bool set_state,
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_odls_globals.output,
|
||||
"%s odls:kill_local_proc working on job %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_JOBID_PRINT(job)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_jobids(job)));
|
||||
|
||||
/* since we are going to be working with the global list of
|
||||
* children, we need to protect that list from modification
|
||||
@ -2351,8 +2351,8 @@ int orte_odls_base_default_kill_local_procs(orte_jobid_t job, bool set_state,
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_odls_globals.output,
|
||||
"%s odls:kill_local_proc checking child process %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(child->name)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(child->name)));
|
||||
|
||||
/* do we have a child from the specified job? Because the
|
||||
* job could be given as a WILDCARD value, we must use
|
||||
@ -2362,9 +2362,9 @@ int orte_odls_base_default_kill_local_procs(orte_jobid_t job, bool set_state,
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_odls_globals.output,
|
||||
"%s odls:kill_local_proc child %s is not part of job %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(child->name),
|
||||
ORTE_JOBID_PRINT(job)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(child->name),
|
||||
orte_util_print_jobids(job)));
|
||||
|
||||
continue;
|
||||
}
|
||||
@ -2398,8 +2398,8 @@ int orte_odls_base_default_kill_local_procs(orte_jobid_t job, bool set_state,
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_odls_globals.output,
|
||||
"%s odls:kill_local_proc child %s is not alive",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(child->name)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(child->name)));
|
||||
|
||||
/* ensure, though, that the state is terminated so we don't lockup if
|
||||
* the proc never started
|
||||
@ -2414,8 +2414,8 @@ int orte_odls_base_default_kill_local_procs(orte_jobid_t job, bool set_state,
|
||||
/* no need to error_log this - it just means that the pid is already gone */
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_odls_globals.output,
|
||||
"%s odls:kill_local_proc child %s wait_cb_cancel failed",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(child->name)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(child->name)));
|
||||
|
||||
goto MOVEON;
|
||||
}
|
||||
@ -2462,8 +2462,8 @@ int orte_odls_base_default_kill_local_procs(orte_jobid_t job, bool set_state,
|
||||
}
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_odls_globals.output,
|
||||
"%s odls:kill_local_proc child %s killed",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(child->name)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(child->name)));
|
||||
child->state = ORTE_PROC_STATE_ABORTED_BY_SIG; /* we may have sent it, but that's what happened */
|
||||
/* let this fall through to record the proc as "not alive" even
|
||||
* if child_died failed. We did our best, so as far as we are
|
||||
@ -2523,8 +2523,8 @@ int orte_odls_base_get_proc_stats(opal_buffer_t *answer,
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_odls_globals.output,
|
||||
"%s odls:get_proc_stats for proc %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(proc)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(proc)));
|
||||
|
||||
/* find this child */
|
||||
for (item = opal_list_get_first(&orte_local_children);
|
||||
|
@ -88,7 +88,7 @@ int orte_odls_base_preload_files_app_context(orte_app_context_t* app_context)
|
||||
if(app_context->preload_binary) {
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_odls_globals.output,
|
||||
"%s) Preload Binary...",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
if( ORTE_SUCCESS != (ret = orte_odls_base_preload_append_binary(app_context,
|
||||
filem_request) ) ){
|
||||
orte_show_help("help-orte-odls-base.txt",
|
||||
@ -102,7 +102,7 @@ int orte_odls_base_preload_files_app_context(orte_app_context_t* app_context)
|
||||
if( NULL != app_context->preload_files) {
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_odls_globals.output,
|
||||
"%s) Preload Files... [%s]",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
app_context->preload_files));
|
||||
if( ORTE_SUCCESS != (ret = orte_odls_base_preload_append_files(app_context,
|
||||
filem_request) ) ){
|
||||
|
@ -438,7 +438,7 @@ int orte_odls_bproc_launch_local_procs(opal_buffer_t *data)
|
||||
if (ORTE_SUCCESS != (rc = orte_odls_base_default_construct_child_list(data, &jobid))) {
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_odls_globals.output,
|
||||
"%s odls:bproc:launch:local failed to construct child list on error %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_ERROR_NAME(rc)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME), ORTE_ERROR_NAME(rc)));
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
@ -449,7 +449,7 @@ int orte_odls_bproc_launch_local_procs(opal_buffer_t *data)
|
||||
child = (orte_odls_child_t *) item;
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_odls_globals.output,
|
||||
"%s odls:bproc:launch:local setting up io for %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(child->name)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME), orte_util_print_name_args(child->name)));
|
||||
/* only setup to forward stdin if it is rank 0, otherwise connect
|
||||
* to /dev/null
|
||||
*/
|
||||
|
@ -345,7 +345,7 @@ static int odls_default_fork_local_proc(orte_app_context_t* context,
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_odls_globals.output,
|
||||
"%s odls:default:fork got code %d back from child",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), i));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME), i));
|
||||
close(p[0]);
|
||||
return ORTE_ERR_PIPE_READ_FAILURE;
|
||||
} else if (0 == rc) {
|
||||
@ -367,7 +367,7 @@ static int odls_default_fork_local_proc(orte_app_context_t* context,
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_odls_globals.output,
|
||||
"%s odls:default:fork got code %d back from child",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), i));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME), i));
|
||||
close(p[0]);
|
||||
return i;
|
||||
}
|
||||
@ -398,7 +398,7 @@ int orte_odls_default_launch_local_procs(opal_buffer_t *data)
|
||||
if (ORTE_SUCCESS != (rc = orte_odls_base_default_construct_child_list(data, &job))) {
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_odls_globals.output,
|
||||
"%s odls:default:launch:local failed to construct child list on error %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_ERROR_NAME(rc)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME), ORTE_ERROR_NAME(rc)));
|
||||
goto CLEANUP;
|
||||
}
|
||||
|
||||
@ -406,7 +406,7 @@ int orte_odls_default_launch_local_procs(opal_buffer_t *data)
|
||||
if (ORTE_SUCCESS != (rc = orte_odls_base_default_launch_local(job, odls_default_fork_local_proc))) {
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_odls_globals.output,
|
||||
"%s odls:default:launch:local failed to launch on error %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_ERROR_NAME(rc)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME), ORTE_ERROR_NAME(rc)));
|
||||
goto CLEANUP;
|
||||
}
|
||||
|
||||
@ -433,7 +433,7 @@ static int send_signal(pid_t pid, int signal)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_odls_globals.output,
|
||||
"%s sending signal %d to pid %ld",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
signal, (long)pid));
|
||||
|
||||
if (kill(pid, signal) != 0) {
|
||||
|
@ -189,7 +189,7 @@ static int odls_process_launch_local_procs(opal_buffer_t *data)
|
||||
if (ORTE_SUCCESS != (rc = orte_odls_base_default_construct_child_list(data, &job))) {
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_odls_globals.output,
|
||||
"%s odls:process:launch:local failed to construct child list on error %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_ERROR_NAME(rc)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME), ORTE_ERROR_NAME(rc)));
|
||||
goto CLEANUP;
|
||||
}
|
||||
|
||||
@ -197,7 +197,7 @@ static int odls_process_launch_local_procs(opal_buffer_t *data)
|
||||
if (ORTE_SUCCESS != (rc = orte_odls_base_default_launch_local(job, odls_process_fork_local_proc))) {
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_odls_globals.output,
|
||||
"%s odls:process:launch:local failed to launch on error %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_ERROR_NAME(rc)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME), ORTE_ERROR_NAME(rc)));
|
||||
goto CLEANUP;
|
||||
}
|
||||
|
||||
|
@ -425,7 +425,7 @@ mca_oob_tcp_create_connection(const int accepted_fd,
|
||||
/* log the accept */
|
||||
if (mca_oob_tcp_component.tcp_debug >= OOB_TCP_DEBUG_CONNECT) {
|
||||
opal_output(0, "%s mca_oob_tcp_accept: %s:%d\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
opal_net_get_hostname(addr),
|
||||
opal_net_get_port(addr));
|
||||
}
|
||||
@ -675,7 +675,7 @@ mca_oob_tcp_create_listen(int *target_sd, unsigned short *target_port, uint16_t
|
||||
|
||||
if (bind(*target_sd, (struct sockaddr*)&inaddr, addrlen) < 0) {
|
||||
opal_output(0, "%s bind() failed for port %d: %s (%d)",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
(int)ntohs(*target_port),
|
||||
strerror(opal_socket_errno),
|
||||
opal_socket_errno );
|
||||
@ -827,7 +827,7 @@ mca_oob_tcp_listen_thread(opal_object_t *obj)
|
||||
opal_output(0,
|
||||
"%s mca_oob_tcp_listen_thread: new connection: "
|
||||
"(%d, %d) %s:%d\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
pending_connection->fd, opal_socket_errno,
|
||||
opal_net_get_hostname((struct sockaddr*) &pending_connection->addr),
|
||||
opal_net_get_port((struct sockaddr*) &pending_connection->addr));
|
||||
@ -902,7 +902,7 @@ mca_oob_tcp_accept_thread_handler(int sd, short flags, void* user)
|
||||
|
||||
if (mca_oob_tcp_component.tcp_debug >= OOB_TCP_DEBUG_INFO) {
|
||||
opal_output(0, "%s in accept_thread_handler: %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), flags);
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME), flags);
|
||||
}
|
||||
|
||||
OBJ_CONSTRUCT(&local_accepted_list, opal_list_t);
|
||||
@ -1023,8 +1023,8 @@ static void mca_oob_tcp_recv_probe(int sd, mca_oob_tcp_hdr_t* hdr)
|
||||
if(retval < 0) {
|
||||
if(opal_socket_errno != EINTR && opal_socket_errno != EAGAIN && opal_socket_errno != EWOULDBLOCK) {
|
||||
opal_output(0, "%s-%s mca_oob_tcp_peer_recv_probe: send() failed: %s (%d)\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(hdr->msg_src)),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(&(hdr->msg_src)),
|
||||
strerror(opal_socket_errno),
|
||||
opal_socket_errno);
|
||||
CLOSE_THE_SOCKET(sd);
|
||||
@ -1052,12 +1052,12 @@ static void mca_oob_tcp_recv_connect(int sd, mca_oob_tcp_hdr_t* hdr)
|
||||
/* now set socket up to be non-blocking */
|
||||
if((flags = fcntl(sd, F_GETFL, 0)) < 0) {
|
||||
opal_output(0, "%s mca_oob_tcp_recv_handler: fcntl(F_GETFL) failed: %s (%d)",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), strerror(opal_socket_errno), opal_socket_errno);
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME), strerror(opal_socket_errno), opal_socket_errno);
|
||||
} else {
|
||||
flags |= O_NONBLOCK;
|
||||
if(fcntl(sd, F_SETFL, flags) < 0) {
|
||||
opal_output(0, "%s mca_oob_tcp_recv_handler: fcntl(F_SETFL) failed: %s (%d)",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), strerror(opal_socket_errno), opal_socket_errno);
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME), strerror(opal_socket_errno), opal_socket_errno);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1073,7 +1073,7 @@ static void mca_oob_tcp_recv_connect(int sd, mca_oob_tcp_hdr_t* hdr)
|
||||
peer = mca_oob_tcp_peer_lookup(&hdr->msg_src);
|
||||
if(NULL == peer) {
|
||||
opal_output(0, "%s mca_oob_tcp_recv_handler: unable to locate peer",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME));
|
||||
CLOSE_THE_SOCKET(sd);
|
||||
return;
|
||||
}
|
||||
@ -1082,9 +1082,9 @@ static void mca_oob_tcp_recv_connect(int sd, mca_oob_tcp_hdr_t* hdr)
|
||||
if(mca_oob_tcp_component.tcp_debug >= OOB_TCP_DEBUG_CONNECT_FAIL) {
|
||||
opal_output(0, "%s-%s mca_oob_tcp_recv_handler: "
|
||||
"rejected connection from %s connection state %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer->peer_name)),
|
||||
ORTE_NAME_PRINT(&(hdr->msg_src)),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(&(peer->peer_name)),
|
||||
orte_util_print_name_args(&(hdr->msg_src)),
|
||||
peer->peer_state);
|
||||
}
|
||||
CLOSE_THE_SOCKET(sd);
|
||||
@ -1126,14 +1126,14 @@ static void mca_oob_tcp_recv_handler(int sd, short flags, void* user)
|
||||
if(rc >= 0) {
|
||||
if(mca_oob_tcp_component.tcp_debug >= OOB_TCP_DEBUG_CONNECT_FAIL) {
|
||||
opal_output(0, "%s mca_oob_tcp_recv_handler: peer closed connection",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME));
|
||||
}
|
||||
CLOSE_THE_SOCKET(sd);
|
||||
return;
|
||||
}
|
||||
if(opal_socket_errno != EINTR) {
|
||||
opal_output(0, "%s mca_oob_tcp_recv_handler: recv() failed: %s (%d)\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), strerror(opal_socket_errno), opal_socket_errno);
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME), strerror(opal_socket_errno), opal_socket_errno);
|
||||
CLOSE_THE_SOCKET(sd);
|
||||
return;
|
||||
}
|
||||
@ -1150,7 +1150,7 @@ static void mca_oob_tcp_recv_handler(int sd, short flags, void* user)
|
||||
break;
|
||||
default:
|
||||
opal_output(0, "%s mca_oob_tcp_recv_handler: invalid message type: %d\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), hdr.msg_type);
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME), hdr.msg_type);
|
||||
CLOSE_THE_SOCKET(sd);
|
||||
break;
|
||||
}
|
||||
@ -1197,14 +1197,14 @@ mca_oob_t* mca_oob_tcp_component_init(int* priority)
|
||||
strstr(mca_oob_tcp_component.tcp_include,name) == NULL) {
|
||||
OPAL_OUTPUT_VERBOSE((1, mca_oob_tcp_output_handle,
|
||||
"%s oob:tcp:init rejecting interface %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), name));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME), name));
|
||||
continue;
|
||||
}
|
||||
if (mca_oob_tcp_component.tcp_exclude != NULL &&
|
||||
strstr(mca_oob_tcp_component.tcp_exclude,name) != NULL) {
|
||||
OPAL_OUTPUT_VERBOSE((1, mca_oob_tcp_output_handle,
|
||||
"%s oob:tcp:init rejecting interface %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), name));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME), name));
|
||||
continue;
|
||||
}
|
||||
|
||||
@ -1213,7 +1213,7 @@ mca_oob_t* mca_oob_tcp_component_init(int* priority)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, mca_oob_tcp_output_handle,
|
||||
"%s oob:tcp:init setting up interface %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), name));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME), name));
|
||||
|
||||
opal_ifindextoaddr(i, (struct sockaddr*) &dev->if_addr, sizeof(struct sockaddr_storage));
|
||||
if(opal_net_islocalhost((struct sockaddr*) &dev->if_addr)) {
|
||||
@ -1476,12 +1476,12 @@ int mca_oob_tcp_init(void)
|
||||
|
||||
if (mca_oob_tcp_component.tcp_debug >= OOB_TCP_DEBUG_INFO) {
|
||||
opal_output(0, "%s accepting connections via listen thread",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME));
|
||||
}
|
||||
} else {
|
||||
if (mca_oob_tcp_component.tcp_debug >= OOB_TCP_DEBUG_INFO) {
|
||||
opal_output(0, "%s accepting connections via event library",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME));
|
||||
}
|
||||
}
|
||||
|
||||
@ -1849,8 +1849,8 @@ mca_oob_tcp_get_new_name(orte_process_name_t* name)
|
||||
|
||||
if(mca_oob_tcp_component.tcp_debug >= OOB_TCP_DEBUG_ALL) {
|
||||
opal_output(0, "%s-%s mca_oob_tcp_get_new_name: starting\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer->peer_name)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(&(peer->peer_name)));
|
||||
}
|
||||
|
||||
/* turn the size to network byte order so there will be no problems */
|
||||
@ -1875,7 +1875,7 @@ mca_oob_tcp_get_new_name(orte_process_name_t* name)
|
||||
*name = *ORTE_PROC_MY_NAME;
|
||||
if(mca_oob_tcp_component.tcp_debug >= OOB_TCP_DEBUG_ALL) {
|
||||
opal_output(0, "%s mca_oob_tcp_get_new_name: done\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -257,8 +257,8 @@ bool mca_oob_tcp_msg_send_handler(mca_oob_tcp_msg_t* msg, struct mca_oob_tcp_pee
|
||||
return false;
|
||||
else {
|
||||
opal_output(0, "%s->%s mca_oob_tcp_msg_send_handler: writev failed: %s (%d) [sd = %d]",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer->peer_name)),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(&(peer->peer_name)),
|
||||
strerror(opal_socket_errno),
|
||||
opal_socket_errno,
|
||||
peer->peer_sd);
|
||||
@ -306,8 +306,8 @@ bool mca_oob_tcp_msg_recv_handler(mca_oob_tcp_msg_t* msg, struct mca_oob_tcp_pee
|
||||
msg->msg_rwbuf = malloc(msg->msg_hdr.msg_size);
|
||||
if(NULL == msg->msg_rwbuf) {
|
||||
opal_output(0, "%s-%s mca_oob_tcp_msg_recv_handler: malloc(%d) failed\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer->peer_name)),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(&(peer->peer_name)),
|
||||
msg->msg_hdr.msg_size);
|
||||
mca_oob_tcp_peer_close(peer);
|
||||
return false;
|
||||
@ -322,9 +322,9 @@ bool mca_oob_tcp_msg_recv_handler(mca_oob_tcp_msg_t* msg, struct mca_oob_tcp_pee
|
||||
}
|
||||
if (mca_oob_tcp_component.tcp_debug >= OOB_TCP_DEBUG_INFO) {
|
||||
opal_output(0, "%s-%s (origin: %s) mca_oob_tcp_msg_recv_handler: size %lu\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer->peer_name)),
|
||||
ORTE_NAME_PRINT(&(msg->msg_hdr.msg_origin)),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(&(peer->peer_name)),
|
||||
orte_util_print_name_args(&(msg->msg_hdr.msg_origin)),
|
||||
(unsigned long)(msg->msg_hdr.msg_size) );
|
||||
}
|
||||
}
|
||||
@ -364,8 +364,8 @@ static bool mca_oob_tcp_msg_recv(mca_oob_tcp_msg_t* msg, mca_oob_tcp_peer_t* pee
|
||||
else if (opal_socket_errno == EAGAIN || opal_socket_errno == EWOULDBLOCK)
|
||||
return false;
|
||||
opal_output(0, "%s-%s mca_oob_tcp_msg_recv: readv failed: %s (%d)",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer->peer_name)),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(&(peer->peer_name)),
|
||||
strerror(opal_socket_errno),
|
||||
opal_socket_errno);
|
||||
mca_oob_tcp_peer_close(peer);
|
||||
@ -376,8 +376,8 @@ static bool mca_oob_tcp_msg_recv(mca_oob_tcp_msg_t* msg, mca_oob_tcp_peer_t* pee
|
||||
} else if (rc == 0) {
|
||||
if(mca_oob_tcp_component.tcp_debug >= OOB_TCP_DEBUG_CONNECT_FAIL) {
|
||||
opal_output(0, "%s-%s mca_oob_tcp_msg_recv: peer closed connection",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer->peer_name)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(&(peer->peer_name)));
|
||||
}
|
||||
mca_oob_tcp_peer_close(peer);
|
||||
if (NULL != mca_oob_tcp.oob_exception_callback) {
|
||||
@ -423,8 +423,8 @@ void mca_oob_tcp_msg_recv_complete(mca_oob_tcp_msg_t* msg, mca_oob_tcp_peer_t* p
|
||||
break;
|
||||
default:
|
||||
opal_output(0, "%s mca_oob_tcp_msg_recv_complete: invalid message type: %d from peer %s\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), msg->msg_hdr.msg_type,
|
||||
ORTE_NAME_PRINT(&peer->peer_name));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME), msg->msg_hdr.msg_type,
|
||||
orte_util_print_name_args(&peer->peer_name));
|
||||
MCA_OOB_TCP_MSG_RETURN(msg);
|
||||
break;
|
||||
}
|
||||
|
@ -302,8 +302,8 @@ mca_oob_tcp_peer_create_socket(mca_oob_tcp_peer_t* peer,
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, mca_oob_tcp_output_handle,
|
||||
"%s oob:tcp:peer creating socket to %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer->peer_name))));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(&(peer->peer_name))));
|
||||
|
||||
peer->peer_sd = socket(af_family, SOCK_STREAM, 0);
|
||||
peer->peer_current_af = af_family;
|
||||
@ -311,8 +311,8 @@ mca_oob_tcp_peer_create_socket(mca_oob_tcp_peer_t* peer,
|
||||
if (peer->peer_sd < 0) {
|
||||
opal_output(0,
|
||||
"%s-%s mca_oob_tcp_peer_create_socket: socket() failed: %s (%d)\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer->peer_name)),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(&(peer->peer_name)),
|
||||
strerror(opal_socket_errno),
|
||||
opal_socket_errno);
|
||||
mca_oob_tcp_peer_shutdown(peer);
|
||||
@ -329,16 +329,16 @@ mca_oob_tcp_peer_create_socket(mca_oob_tcp_peer_t* peer,
|
||||
if (peer->peer_sd >= 0) {
|
||||
if((flags = fcntl(peer->peer_sd, F_GETFL, 0)) < 0) {
|
||||
opal_output(0, "%s-%s mca_oob_tcp_peer_connect: fcntl(F_GETFL) failed: %s (%d)\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer->peer_name)),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(&(peer->peer_name)),
|
||||
strerror(opal_socket_errno),
|
||||
opal_socket_errno);
|
||||
} else {
|
||||
flags |= O_NONBLOCK;
|
||||
if(fcntl(peer->peer_sd, F_SETFL, flags) < 0)
|
||||
opal_output(0, "%s-%s mca_oob_tcp_peer_connect: fcntl(F_SETFL) failed: %s (%d)\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer->peer_name)),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(&(peer->peer_name)),
|
||||
strerror(opal_socket_errno),
|
||||
opal_socket_errno);
|
||||
}
|
||||
@ -362,8 +362,8 @@ static int mca_oob_tcp_peer_try_connect(mca_oob_tcp_peer_t* peer)
|
||||
if(ORTE_SUCCESS != (rc = mca_oob_tcp_addr_get_next(peer->peer_addr, (struct sockaddr*) &inaddr))) {
|
||||
opal_output(0, "%s-%s mca_oob_tcp_peer_try_connect: "
|
||||
"mca_oob_tcp_addr_get_next failed with error=%d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer->peer_name)),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(&(peer->peer_name)),
|
||||
rc);
|
||||
mca_oob_tcp_peer_close(peer);
|
||||
return ORTE_ERR_UNREACH;
|
||||
@ -384,8 +384,8 @@ static int mca_oob_tcp_peer_try_connect(mca_oob_tcp_peer_t* peer)
|
||||
if(mca_oob_tcp_component.tcp_debug >= OOB_TCP_DEBUG_CONNECT) {
|
||||
opal_output(0, "%s-%s mca_oob_tcp_peer_try_connect: "
|
||||
"connecting port %d to: %s:%d\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer->peer_name)),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(&(peer->peer_name)),
|
||||
/* Bug, FIXME: output tcp6_listen_port for AF_INET6 */
|
||||
ntohs(mca_oob_tcp_component.tcp_listen_port),
|
||||
opal_net_get_hostname((struct sockaddr*) &inaddr),
|
||||
@ -428,8 +428,8 @@ static int mca_oob_tcp_peer_try_connect(mca_oob_tcp_peer_t* peer)
|
||||
ECONNREFUSED != opal_socket_errno)) {
|
||||
opal_output(0, "%s-%s mca_oob_tcp_peer_try_connect: "
|
||||
"connect to %s:%d failed: %s (%d)",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer->peer_name)),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(&(peer->peer_name)),
|
||||
opal_net_get_hostname((struct sockaddr*) &inaddr),
|
||||
opal_net_get_port((struct sockaddr*) &inaddr),
|
||||
strerror(opal_socket_errno),
|
||||
@ -447,8 +447,8 @@ static int mca_oob_tcp_peer_try_connect(mca_oob_tcp_peer_t* peer)
|
||||
opal_output(0,
|
||||
"%s-%s mca_oob_tcp_peer_try_connect: "
|
||||
"mca_oob_tcp_peer_send_connect_ack to %s:%d failed: %s (%d)",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer->peer_name)),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(&(peer->peer_name)),
|
||||
opal_net_get_hostname((struct sockaddr*) &inaddr),
|
||||
opal_net_get_port((struct sockaddr*) &inaddr),
|
||||
opal_strerror(rc),
|
||||
@ -461,8 +461,8 @@ static int mca_oob_tcp_peer_try_connect(mca_oob_tcp_peer_t* peer)
|
||||
if(mca_oob_tcp_component.tcp_debug >= OOB_TCP_DEBUG_CONNECT) {
|
||||
opal_output(0, "%s-%s mca_oob_tcp_peer_try_connect: "
|
||||
"Connection across all interfaces failed. Likely will retry",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer->peer_name)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(&(peer->peer_name)));
|
||||
}
|
||||
mca_oob_tcp_peer_close(peer);
|
||||
return ORTE_ERR_UNREACH;
|
||||
@ -507,8 +507,8 @@ static void mca_oob_tcp_peer_complete_connect(mca_oob_tcp_peer_t* peer, int sd)
|
||||
/* check connect completion status */
|
||||
if(getsockopt(sd, SOL_SOCKET, SO_ERROR, (char *)&so_error, &so_length) < 0) {
|
||||
opal_output(0, "%s-%s mca_oob_tcp_peer_complete_connect: getsockopt() failed: %s (%d)\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer->peer_name)),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(&(peer->peer_name)),
|
||||
strerror(opal_socket_errno),
|
||||
opal_socket_errno);
|
||||
mca_oob_tcp_peer_close(peer);
|
||||
@ -523,8 +523,8 @@ static void mca_oob_tcp_peer_complete_connect(mca_oob_tcp_peer_t* peer, int sd)
|
||||
if (mca_oob_tcp_component.tcp_debug >= OOB_TCP_DEBUG_CONNECT) {
|
||||
opal_output(0, "%s-%s mca_oob_tcp_peer_complete_connect: "
|
||||
"connection failed: %s (%d) - retrying\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer->peer_name)),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(&(peer->peer_name)),
|
||||
strerror(so_error),
|
||||
so_error);
|
||||
}
|
||||
@ -542,8 +542,8 @@ static void mca_oob_tcp_peer_complete_connect(mca_oob_tcp_peer_t* peer, int sd)
|
||||
if (mca_oob_tcp_component.tcp_debug >= OOB_TCP_DEBUG_CONNECT) {
|
||||
opal_output(0, "%s-%s mca_oob_tcp_peer_complete_connect: "
|
||||
"sending ack, %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer->peer_name)), so_error);
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(&(peer->peer_name)), so_error);
|
||||
}
|
||||
|
||||
if (mca_oob_tcp_peer_send_connect_ack(peer, sd) == ORTE_SUCCESS) {
|
||||
@ -551,8 +551,8 @@ static void mca_oob_tcp_peer_complete_connect(mca_oob_tcp_peer_t* peer, int sd)
|
||||
opal_event_add(&peer->peer_recv_event, 0);
|
||||
} else {
|
||||
opal_output(0, "%s-%s mca_oob_tcp_peer_complete_connect: unable to send connect ack.",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer->peer_name)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(&(peer->peer_name)));
|
||||
mca_oob_tcp_peer_close(peer);
|
||||
}
|
||||
}
|
||||
@ -585,8 +585,8 @@ void mca_oob_tcp_peer_close(mca_oob_tcp_peer_t* peer)
|
||||
{
|
||||
if(mca_oob_tcp_component.tcp_debug >= OOB_TCP_DEBUG_CONNECT) {
|
||||
opal_output(0, "%s-%s mca_oob_tcp_peer_close(%p) sd %d state %d\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer->peer_name)),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(&(peer->peer_name)),
|
||||
(void*)peer,
|
||||
peer->peer_sd,
|
||||
peer->peer_state);
|
||||
@ -614,8 +614,8 @@ void mca_oob_tcp_peer_shutdown(mca_oob_tcp_peer_t* peer)
|
||||
mca_oob_tcp_msg_t *msg;
|
||||
|
||||
opal_output(0, "%s-%s oob-tcp: Communication retries exceeded. Can not communicate with peer",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer->peer_name)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(&(peer->peer_name)));
|
||||
|
||||
/* There are cases during the initial connection setup where
|
||||
the peer_send_msg is NULL but there are things in the queue
|
||||
@ -692,8 +692,8 @@ static int mca_oob_tcp_peer_recv_connect_ack(mca_oob_tcp_peer_t* peer, int sd)
|
||||
opal_output(0,
|
||||
"%s-%s mca_oob_tcp_peer_recv_connect_ack "
|
||||
"connect failed during receive. Restarting (%s).",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer->peer_name)),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(&(peer->peer_name)),
|
||||
strerror(opal_socket_errno));
|
||||
}
|
||||
opal_event_del(&peer->peer_recv_event);
|
||||
@ -717,9 +717,9 @@ static int mca_oob_tcp_peer_recv_connect_ack(mca_oob_tcp_peer_t* peer, int sd)
|
||||
if (OPAL_EQUAL != orte_util_compare_name_fields(ORTE_NS_CMP_ALL, &peer->peer_name, &hdr.msg_src)) {
|
||||
opal_output(0, "%s-%s mca_oob_tcp_peer_recv_connect_ack: "
|
||||
"received unexpected process identifier %s\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer->peer_name)),
|
||||
ORTE_NAME_PRINT(&(hdr.msg_src)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(&(peer->peer_name)),
|
||||
orte_util_print_name_args(&(hdr.msg_src)));
|
||||
mca_oob_tcp_peer_close(peer);
|
||||
return ORTE_ERR_UNREACH;
|
||||
}
|
||||
@ -749,8 +749,8 @@ static int mca_oob_tcp_peer_recv_blocking(mca_oob_tcp_peer_t* peer, int sd, void
|
||||
if(mca_oob_tcp_component.tcp_debug >= OOB_TCP_DEBUG_INFO) {
|
||||
opal_output(0, "%s-%s mca_oob_tcp_peer_recv_blocking: "
|
||||
"peer closed connection: peer state %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer->peer_name)),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(&(peer->peer_name)),
|
||||
peer->peer_state);
|
||||
}
|
||||
mca_oob_tcp_peer_close(peer);
|
||||
@ -782,8 +782,8 @@ static int mca_oob_tcp_peer_recv_blocking(mca_oob_tcp_peer_t* peer, int sd, void
|
||||
opal_output(0,
|
||||
"%s-%s mca_oob_tcp_peer_recv_blocking: "
|
||||
"recv() failed: %s (%d)\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer->peer_name)),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(&(peer->peer_name)),
|
||||
strerror(errno),
|
||||
errno);
|
||||
mca_oob_tcp_peer_close(peer);
|
||||
@ -810,8 +810,8 @@ static int mca_oob_tcp_peer_send_blocking(mca_oob_tcp_peer_t* peer, int sd, void
|
||||
if(retval < 0) {
|
||||
if(opal_socket_errno != EINTR && opal_socket_errno != EAGAIN && opal_socket_errno != EWOULDBLOCK) {
|
||||
opal_output(0, "%s-%s mca_oob_tcp_peer_send_blocking: send() failed: %s (%d)\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer->peer_name)),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(&(peer->peer_name)),
|
||||
strerror(opal_socket_errno),
|
||||
opal_socket_errno);
|
||||
mca_oob_tcp_peer_close(peer);
|
||||
@ -878,8 +878,8 @@ static void mca_oob_tcp_peer_recv_handler(int sd, short flags, void* user)
|
||||
MCA_OOB_TCP_MSG_ALLOC(msg, rc);
|
||||
if(NULL == msg) {
|
||||
opal_output(0, "%s-%s mca_oob_tcp_peer_recv_handler: unable to allocate recv message\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer->peer_name)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(&(peer->peer_name)));
|
||||
return;
|
||||
}
|
||||
msg->msg_type = MCA_OOB_TCP_UNEXPECTED;
|
||||
@ -908,8 +908,8 @@ static void mca_oob_tcp_peer_recv_handler(int sd, short flags, void* user)
|
||||
default:
|
||||
{
|
||||
opal_output(0, "%s-%s mca_oob_tcp_peer_recv_handler: invalid socket state(%d)",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer->peer_name)),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(&(peer->peer_name)),
|
||||
peer->peer_state);
|
||||
mca_oob_tcp_peer_close(peer);
|
||||
break;
|
||||
@ -956,8 +956,8 @@ static void mca_oob_tcp_peer_send_handler(int sd, short flags, void* user)
|
||||
}
|
||||
default:
|
||||
opal_output(0, "%s-%s mca_oob_tcp_peer_send_handler: invalid connection state (%d)",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer->peer_name)),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(&(peer->peer_name)),
|
||||
peer->peer_state);
|
||||
opal_event_del(&peer->peer_send_event);
|
||||
break;
|
||||
@ -1022,8 +1022,8 @@ void mca_oob_tcp_peer_dump(mca_oob_tcp_peer_t* peer, const char* msg)
|
||||
#endif
|
||||
|
||||
snprintf(buff, sizeof(buff), "%s-%s %s: %s - %s nodelay %d sndbuf %d rcvbuf %d flags %08x\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer->peer_name)),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(&(peer->peer_name)),
|
||||
msg, src, dst, nodelay, sndbuf, rcvbuf, flags);
|
||||
opal_output(0, buff);
|
||||
}
|
||||
@ -1058,8 +1058,8 @@ bool mca_oob_tcp_peer_accept(mca_oob_tcp_peer_t* peer, int sd)
|
||||
if(mca_oob_tcp_peer_send_connect_ack(peer, sd) != ORTE_SUCCESS) {
|
||||
opal_output(0, "%s-%s mca_oob_tcp_peer_accept: "
|
||||
"mca_oob_tcp_peer_send_connect_ack failed\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer->peer_name)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(&(peer->peer_name)));
|
||||
mca_oob_tcp_peer_close(peer);
|
||||
OPAL_THREAD_UNLOCK(&peer->peer_lock);
|
||||
return false;
|
||||
|
@ -91,8 +91,8 @@ mca_oob_tcp_ping(const orte_process_name_t* name,
|
||||
if(ORTE_SUCCESS != (rc = mca_oob_tcp_parse_uri(uri, (struct sockaddr*) &inaddr))) {
|
||||
opal_output(0,
|
||||
"%s-%s mca_oob_tcp_ping: invalid uri: %s\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(name),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(name),
|
||||
uri);
|
||||
return rc;
|
||||
}
|
||||
@ -102,8 +102,8 @@ mca_oob_tcp_ping(const orte_process_name_t* name,
|
||||
if (sd < 0) {
|
||||
opal_output(0,
|
||||
"%s-%s mca_oob_tcp_ping: socket() failed: %s (%d)\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(name),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(name),
|
||||
strerror(opal_socket_errno),
|
||||
opal_socket_errno);
|
||||
return ORTE_ERR_UNREACH;
|
||||
@ -112,16 +112,16 @@ mca_oob_tcp_ping(const orte_process_name_t* name,
|
||||
/* setup the socket as non-blocking */
|
||||
if((flags = fcntl(sd, F_GETFL, 0)) < 0) {
|
||||
opal_output(0, "%s-%s mca_oob_tcp_ping: fcntl(F_GETFL) failed: %s (%d)\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(name),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(name),
|
||||
strerror(opal_socket_errno),
|
||||
opal_socket_errno);
|
||||
} else {
|
||||
flags |= O_NONBLOCK;
|
||||
if(fcntl(sd, F_SETFL, flags) < 0) {
|
||||
opal_output(0, "%s-%s mca_oob_tcp_ping: fcntl(F_SETFL) failed: %s (%d)\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(name),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(name),
|
||||
strerror(opal_socket_errno),
|
||||
opal_socket_errno);
|
||||
}
|
||||
@ -144,8 +144,8 @@ mca_oob_tcp_ping(const orte_process_name_t* name,
|
||||
/* connect failed? */
|
||||
if(opal_socket_errno != EINPROGRESS && opal_socket_errno != EWOULDBLOCK) {
|
||||
opal_output(0, "%s-%s mca_oob_tcp_ping: connect failed: %s (%d)\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(name),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(name),
|
||||
strerror(opal_socket_errno),
|
||||
opal_socket_errno);
|
||||
CLOSE_THE_SOCKET(sd);
|
||||
@ -166,8 +166,8 @@ mca_oob_tcp_ping(const orte_process_name_t* name,
|
||||
flags &= ~O_NONBLOCK;
|
||||
if(fcntl(sd, F_SETFL, flags) < 0) {
|
||||
opal_output(0, "%s-%s mca_oob_tcp_ping: fcntl(F_SETFL) failed: %s (%d)\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(name),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(name),
|
||||
strerror(opal_socket_errno),
|
||||
opal_socket_errno);
|
||||
}
|
||||
|
@ -120,8 +120,8 @@ int mca_oob_tcp_send_nb(
|
||||
|
||||
if(mca_oob_tcp_component.tcp_debug >= OOB_TCP_DEBUG_ALL) {
|
||||
opal_output(0, "%s-%s mca_oob_tcp_send_nb: tag %d size %lu\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer->peer_name)),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(&(peer->peer_name)),
|
||||
tag, (unsigned long)size );
|
||||
}
|
||||
|
||||
|
@ -172,8 +172,8 @@ static int plm_alps_launch_job(orte_job_t *jdata)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||
"%s plm:alps: launching job %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_JOBID_PRINT(jdata->jobid)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_jobids(jdata->jobid)));
|
||||
|
||||
/* setup the job */
|
||||
if (ORTE_SUCCESS != (rc = orte_plm_base_setup_job(jdata))) {
|
||||
@ -197,7 +197,7 @@ static int plm_alps_launch_job(orte_job_t *jdata)
|
||||
/* have all the daemons we need - launch app */
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||
"%s plm:alps: no new daemons to launch",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
goto launch_apps;
|
||||
}
|
||||
|
||||
|
@ -86,7 +86,7 @@ static void check_heartbeat(int fd, short dummy, void *arg)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||
"%s plm:base:check_heartbeat",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
|
||||
/* if we are aborting or shutting down, ignore this */
|
||||
if (orte_abnormal_term_ordered || 0 == orte_heartbeat_rate) {
|
||||
|
@ -70,8 +70,8 @@ int orte_plm_base_setup_job(orte_job_t *jdata)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output,
|
||||
"%s plm:base:setup_job for job %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_JOBID_PRINT(jdata->jobid)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_jobids(jdata->jobid)));
|
||||
|
||||
/* insert the job object into the global pool */
|
||||
ljob = ORTE_LOCAL_JOBID(jdata->jobid);
|
||||
@ -112,9 +112,9 @@ int orte_plm_base_setup_job(orte_job_t *jdata)
|
||||
nd = (orte_nid_t**)orte_nidmap.addr;
|
||||
for (i=0; i < orte_nidmap.size && NULL != nd[i]; i++) {
|
||||
fprintf(stderr, "%s node[%d].name %s daemon %s arch %0x\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), i,
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME), i,
|
||||
(NULL == nd[i]) ? "NULL" : nd[i]->name,
|
||||
ORTE_VPID_PRINT(nd[i]->daemon),
|
||||
orte_util_print_vpids(nd[i]->daemon),
|
||||
(NULL == nd[i]) ? 0 : nd[i]->arch);
|
||||
for (item = opal_list_get_first(&nd[i]->attrs);
|
||||
item != opal_list_get_end(&nd[i]->attrs);
|
||||
@ -140,8 +140,8 @@ int orte_plm_base_setup_job(orte_job_t *jdata)
|
||||
jdata->num_procs <= jdata->stdin_target) {
|
||||
/* this request cannot be met */
|
||||
orte_show_help("help-plm-base.txt", "stdin-target-out-of-range", true,
|
||||
ORTE_VPID_PRINT(jdata->stdin_target),
|
||||
ORTE_VPID_PRINT(jdata->num_procs));
|
||||
orte_util_print_vpids(jdata->stdin_target),
|
||||
orte_util_print_vpids(jdata->num_procs));
|
||||
orte_finalize();
|
||||
exit(ORTE_ERROR_DEFAULT_EXIT_CODE);
|
||||
}
|
||||
@ -201,8 +201,8 @@ int orte_plm_base_launch_apps(orte_jobid_t job)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output,
|
||||
"%s plm:base:launch_apps for job %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_JOBID_PRINT(job)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_jobids(job)));
|
||||
|
||||
if (orte_timing) {
|
||||
gettimeofday(&app_launch_start, NULL);
|
||||
@ -250,8 +250,8 @@ int orte_plm_base_launch_apps(orte_jobid_t job)
|
||||
if (ORTE_SUCCESS != (rc = orte_plm_base_report_launched(job))) {
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output,
|
||||
"%s plm:base:launch failed for job %s on error %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_JOBID_PRINT(job), ORTE_ERROR_NAME(rc)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_jobids(job), ORTE_ERROR_NAME(rc)));
|
||||
return rc;
|
||||
}
|
||||
|
||||
@ -268,7 +268,7 @@ int orte_plm_base_launch_apps(orte_jobid_t job)
|
||||
/* complete wiring up the iof */
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output,
|
||||
"%s plm:base:launch wiring up iof",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
|
||||
/* push stdin - the IOF will know what to do with the specified target */
|
||||
name.jobid = job;
|
||||
@ -281,8 +281,8 @@ int orte_plm_base_launch_apps(orte_jobid_t job)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output,
|
||||
"%s plm:base:launch completed for job %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_JOBID_PRINT(job)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_jobids(job)));
|
||||
return rc;
|
||||
}
|
||||
|
||||
@ -303,15 +303,15 @@ void orte_plm_base_launch_failed(orte_jobid_t job, pid_t pid,
|
||||
if (!opal_atomic_trylock(&orte_abort_inprogress_lock)) { /* returns 1 if already locked */
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||
"%s plm:base:launch_failed abort in progress, ignoring report",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output,
|
||||
"%s plm:base:launch_failed for job %s, status %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_JOBID_PRINT(job), status));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_jobids(job), status));
|
||||
|
||||
/* no matter what, we must exit with a non-zero status */
|
||||
if (0 == status) {
|
||||
@ -404,8 +404,8 @@ static void process_orted_launch_report(int fd, short event, void *data)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output,
|
||||
"%s plm:base:orted_report_launch from daemon %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&mev->sender)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(&mev->sender)));
|
||||
|
||||
/* see if we need to timestamp this receipt */
|
||||
if (orte_timing) {
|
||||
@ -532,9 +532,9 @@ CLEANUP:
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output,
|
||||
"%s plm:base:orted_report_launch %s for daemon %s at contact %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orted_failed_launch ? "failed" : "completed",
|
||||
ORTE_NAME_PRINT(&mev->sender), pdatorted[mev->sender.vpid]->rml_uri));
|
||||
orte_util_print_name_args(&mev->sender), pdatorted[mev->sender.vpid]->rml_uri));
|
||||
|
||||
/* release the message */
|
||||
OBJ_RELEASE(mev);
|
||||
@ -580,7 +580,7 @@ int orte_plm_base_daemon_callback(orte_std_cntr_t num_daemons)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output,
|
||||
"%s plm:base:daemon_callback",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
|
||||
orted_num_callback = 0;
|
||||
orted_failed_launch = false;
|
||||
@ -622,7 +622,7 @@ int orte_plm_base_daemon_callback(orte_std_cntr_t num_daemons)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output,
|
||||
"%s plm:base:daemon_callback completed",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
|
||||
/* if a tree-launch was underway, clear out the cmd */
|
||||
if (NULL != orte_tree_launch_cmd) {
|
||||
@ -659,8 +659,8 @@ void orte_plm_base_app_report_launch(int fd, short event, void *data)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output,
|
||||
"%s plm:base:app_report_launch from daemon %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&mev->sender)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(&mev->sender)));
|
||||
|
||||
/* unpack the jobid being reported */
|
||||
cnt = 1;
|
||||
@ -721,7 +721,7 @@ void orte_plm_base_app_report_launch(int fd, short event, void *data)
|
||||
ORTE_COMPUTE_TIME_DIFF(sec, usec, launch_msg_sent.tv_sec, launch_msg_sent.tv_usec,
|
||||
tmpsec, tmpusec);
|
||||
fprintf(orte_timing_output, "Time for launch msg to reach daemon %s: %s\n",
|
||||
ORTE_VPID_PRINT(mev->sender.vpid), pretty_print_timing(sec, usec));
|
||||
orte_util_print_vpids(mev->sender.vpid), pretty_print_timing(sec, usec));
|
||||
}
|
||||
}
|
||||
|
||||
@ -765,7 +765,7 @@ void orte_plm_base_app_report_launch(int fd, short event, void *data)
|
||||
/* remove the newline and the year at the end */
|
||||
tmpstr[strlen(tmpstr)-6] = '\0';
|
||||
fprintf(orte_timing_output, "Time rank %s was launched: %s.%3lu\n",
|
||||
ORTE_VPID_PRINT(vpid), tmpstr, (unsigned long)(tmpusec/1000));
|
||||
orte_util_print_vpids(vpid), tmpstr, (unsigned long)(tmpusec/1000));
|
||||
}
|
||||
}
|
||||
/* unpack the state */
|
||||
@ -785,9 +785,9 @@ void orte_plm_base_app_report_launch(int fd, short event, void *data)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output,
|
||||
"%s plm:base:app_report_launched for proc %s from daemon %s: pid %lu state %0x exit %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(procs[vpid]->name)),
|
||||
ORTE_NAME_PRINT(&mev->sender), (unsigned long)pid,
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(&(procs[vpid]->name)),
|
||||
orte_util_print_name_args(&mev->sender), (unsigned long)pid,
|
||||
(int)state, (int)exit_code));
|
||||
|
||||
/* lookup the proc and update values */
|
||||
@ -797,9 +797,9 @@ void orte_plm_base_app_report_launch(int fd, short event, void *data)
|
||||
if (ORTE_PROC_STATE_FAILED_TO_START == state) {
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output,
|
||||
"%s plm:base:app_report_launched daemon %s reports proc %s failed to start",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&mev->sender),
|
||||
ORTE_NAME_PRINT(&(procs[vpid]->name))));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(&mev->sender),
|
||||
orte_util_print_name_args(&(procs[vpid]->name))));
|
||||
if (NULL == jdata->aborted_proc) {
|
||||
jdata->aborted_proc = procs[vpid]; /* only store this once */
|
||||
jdata->state = ORTE_JOB_STATE_FAILED_TO_START; /* update the job state */
|
||||
@ -821,7 +821,7 @@ void orte_plm_base_app_report_launch(int fd, short event, void *data)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output,
|
||||
"%s plm:base:app_report_launch completed processing",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
|
||||
CLEANUP:
|
||||
if (app_launch_failed) {
|
||||
@ -856,7 +856,7 @@ static void app_report_launch(int status, orte_process_name_t* sender,
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output,
|
||||
"%s plm:base:app_report_launch reissuing non-blocking recv",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
|
||||
/* reissue the non-blocking receive */
|
||||
rc = orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_APP_LAUNCH_CALLBACK,
|
||||
@ -875,8 +875,8 @@ static int orte_plm_base_report_launched(orte_jobid_t job)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output,
|
||||
"%s plm:base:report_launched for job %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_JOBID_PRINT(job)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_jobids(job)));
|
||||
|
||||
/* get the job data object */
|
||||
if (NULL == (jdata = orte_get_job_data_object(job))) {
|
||||
@ -906,7 +906,7 @@ static int orte_plm_base_report_launched(orte_jobid_t job)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output,
|
||||
"%s plm:base:report_launched all apps reported",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
|
||||
/* declare the job to be launched, but check to ensure
|
||||
* the procs haven't already reported in to avoid setting the
|
||||
@ -1151,7 +1151,7 @@ void orte_plm_base_check_job_completed(orte_job_t *jdata)
|
||||
if (NULL == jdata) {
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output,
|
||||
"%s plm:base:check_job_completed called with NULL pointer",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
goto CHECK_ALL_JOBS;
|
||||
}
|
||||
|
||||
@ -1162,8 +1162,8 @@ void orte_plm_base_check_job_completed(orte_job_t *jdata)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output,
|
||||
"%s plm:base:check_job_completed for job %s - num_terminated %lu num_procs %lu",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_JOBID_PRINT(jdata->jobid),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_jobids(jdata->jobid),
|
||||
(unsigned long)jdata->num_terminated,
|
||||
(unsigned long)jdata->num_procs));
|
||||
|
||||
@ -1234,9 +1234,9 @@ void orte_plm_base_check_job_completed(orte_job_t *jdata)
|
||||
if (ORTE_JOB_STATE_FAILED_TO_START == jdata->state) {
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output,
|
||||
"%s plm:base:check_job_completed declared job %s failed to start by proc %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_JOBID_PRINT(jdata->jobid),
|
||||
(NULL == jdata->aborted_proc) ? "unknown" : ORTE_NAME_PRINT(&(jdata->aborted_proc->name))));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_jobids(jdata->jobid),
|
||||
(NULL == jdata->aborted_proc) ? "unknown" : orte_util_print_name_args(&(jdata->aborted_proc->name))));
|
||||
/* report this to the errmgr - it will protect us from multiple calls */
|
||||
if (NULL == jdata->aborted_proc) {
|
||||
/* we don't know who caused us to abort */
|
||||
@ -1250,9 +1250,9 @@ void orte_plm_base_check_job_completed(orte_job_t *jdata)
|
||||
ORTE_JOB_STATE_ABORTED_WO_SYNC == jdata->state) {
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output,
|
||||
"%s plm:base:check_job_completed declared job %s aborted by proc %s with code %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_JOBID_PRINT(jdata->jobid),
|
||||
(NULL == jdata->aborted_proc) ? "unknown" : ORTE_NAME_PRINT(&(jdata->aborted_proc->name)),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_jobids(jdata->jobid),
|
||||
(NULL == jdata->aborted_proc) ? "unknown" : orte_util_print_name_args(&(jdata->aborted_proc->name)),
|
||||
(NULL == jdata->aborted_proc) ? ORTE_ERROR_DEFAULT_EXIT_CODE : jdata->aborted_proc->exit_code));
|
||||
/* report this to the errmgr */
|
||||
if (NULL == jdata->aborted_proc) {
|
||||
@ -1269,8 +1269,8 @@ void orte_plm_base_check_job_completed(orte_job_t *jdata)
|
||||
jdata->state = ORTE_JOB_STATE_TERMINATED;
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output,
|
||||
"%s plm:base:check_job_completed declared job %s normally terminated - checking all jobs",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_JOBID_PRINT(jdata->jobid)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_jobids(jdata->jobid)));
|
||||
|
||||
CHECK_ALL_JOBS:
|
||||
/* if the job that is being checked is the HNP, then we are
|
||||
@ -1366,8 +1366,8 @@ CHECK_ALL_JOBS:
|
||||
*/
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output,
|
||||
"%s plm:base:check_job_completed job %s is not terminated",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_JOBID_PRINT(jobs[j]->jobid)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_jobids(jobs[j]->jobid)));
|
||||
one_still_alive = true;
|
||||
}
|
||||
}
|
||||
@ -1375,13 +1375,13 @@ CHECK_ALL_JOBS:
|
||||
if (one_still_alive) {
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output,
|
||||
"%s plm:base:check_job_completed at least one job is not terminated",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
return;
|
||||
}
|
||||
/* if we get here, then all jobs are done, so wakeup */
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output,
|
||||
"%s plm:base:check_job_completed all jobs terminated - waking up",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
/* set the exit status to 0 - this will only happen if it
|
||||
* wasn't already set by an error condition
|
||||
*/
|
||||
|
@ -53,7 +53,7 @@ static void failed_send(int fd, short event, void *arg)
|
||||
*/
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output,
|
||||
"%s plm:base:orted_cmd command messages timed out with num_sent %ld",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (long)num_reported));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME), (long)num_reported));
|
||||
done_reporting = true;
|
||||
}
|
||||
|
||||
@ -65,8 +65,8 @@ static void send_callback(int status,
|
||||
{
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output,
|
||||
"%s plm:base:orted_cmd message to %s sent",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(peer)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(peer)));
|
||||
|
||||
num_reported++;
|
||||
if (num_reported == num_being_sent) {
|
||||
@ -82,7 +82,7 @@ static void send_callback(int status,
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output,
|
||||
"%s plm:base:orted_cmd all messages sent",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
|
||||
}
|
||||
}
|
||||
@ -96,7 +96,7 @@ int orte_plm_base_orted_exit(orte_daemon_cmd_flag_t command)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output,
|
||||
"%s plm:base:orted_cmd sending orted_exit commands",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
|
||||
/* stop all heartbeats */
|
||||
orte_heartbeat_rate = 0;
|
||||
@ -136,7 +136,7 @@ int orte_plm_base_orted_exit(orte_daemon_cmd_flag_t command)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output,
|
||||
"%s plm:base:orted_cmd:orted_exit abnormal term ordered",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
|
||||
/* turn off message routing - no way to guarantee that
|
||||
* the route still exists
|
||||
@ -172,8 +172,8 @@ int orte_plm_base_orted_exit(orte_daemon_cmd_flag_t command)
|
||||
*/
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output,
|
||||
"%s plm:base:orted_cmd:orted_exit sending cmd to %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&peer)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(&peer)));
|
||||
orte_rml.send_buffer_nb(&peer, &cmd, ORTE_RML_TAG_DAEMON, 0,
|
||||
send_callback, 0);
|
||||
}
|
||||
@ -229,7 +229,7 @@ int orte_plm_base_orted_kill_local_procs(orte_jobid_t job)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output,
|
||||
"%s plm:base:orted_cmd sending kill_local_procs cmds",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
|
||||
OBJ_CONSTRUCT(&cmd, opal_buffer_t);
|
||||
|
||||
@ -261,7 +261,7 @@ int orte_plm_base_orted_kill_local_procs(orte_jobid_t job)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output,
|
||||
"%s plm:base:orted_cmd:kill_local_procs abnormal term ordered",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
|
||||
/* get the job object for the daemons */
|
||||
if (NULL == (daemons = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid))) {
|
||||
@ -311,8 +311,8 @@ int orte_plm_base_orted_kill_local_procs(orte_jobid_t job)
|
||||
*/
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output,
|
||||
"%s plm:base:orted_cmd:kill_local_procs sending cmd to %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&peer)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(&peer)));
|
||||
orte_rml.send_buffer_nb(&peer, &cmd, ORTE_RML_TAG_DAEMON, 0,
|
||||
send_callback, 0);
|
||||
}
|
||||
@ -366,7 +366,7 @@ int orte_plm_base_orted_signal_local_procs(orte_jobid_t job, int32_t signal)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output,
|
||||
"%s plm:base:orted_cmd sending signal_local_procs cmds",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
|
||||
OBJ_CONSTRUCT(&cmd, opal_buffer_t);
|
||||
|
||||
|
@ -46,7 +46,7 @@ int orte_plm_proxy_spawn(orte_job_t *jdata)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output,
|
||||
"%s plm:base:proxy spawn child job",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
|
||||
/* setup the buffer */
|
||||
OBJ_CONSTRUCT(&buf, opal_buffer_t);
|
||||
@ -74,8 +74,8 @@ int orte_plm_proxy_spawn(orte_job_t *jdata)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output,
|
||||
"%s plm:base:proxy sending spawn cmd to %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(target)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(target)));
|
||||
|
||||
/* tell the target to launch the job */
|
||||
if (0 > (rc = orte_rml.send_buffer(target, &buf, ORTE_RML_TAG_PLM, 0))) {
|
||||
@ -87,7 +87,7 @@ int orte_plm_proxy_spawn(orte_job_t *jdata)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output,
|
||||
"%s plm:base:proxy waiting for response",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
|
||||
/* wait for the target's response */
|
||||
OBJ_CONSTRUCT(&buf, opal_buffer_t);
|
||||
|
@ -64,7 +64,7 @@ int orte_plm_base_comm_start(void)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output,
|
||||
"%s plm:base:receive start comm",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
|
||||
if (ORTE_SUCCESS != (rc = orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD,
|
||||
ORTE_RML_TAG_PLM,
|
||||
@ -87,7 +87,7 @@ int orte_plm_base_comm_stop(void)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output,
|
||||
"%s plm:base:receive stop comm",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
|
||||
orte_rml.recv_cancel(ORTE_NAME_WILDCARD, ORTE_RML_TAG_PLM);
|
||||
recv_issued = false;
|
||||
@ -127,7 +127,7 @@ void orte_plm_base_receive_process_msg(int fd, short event, void *data)
|
||||
case ORTE_PLM_LAUNCH_JOB_CMD:
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output,
|
||||
"%s plm:base:receive job launch command",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
|
||||
/* unpack the job object */
|
||||
count = 1;
|
||||
@ -201,8 +201,8 @@ void orte_plm_base_receive_process_msg(int fd, short event, void *data)
|
||||
ANSWER_LAUNCH:
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output,
|
||||
"%s plm:base:receive job %s launched",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_JOBID_PRINT(job)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_jobids(job)));
|
||||
|
||||
/* pack the jobid to be returned */
|
||||
if (ORTE_SUCCESS != (ret = opal_dss.pack(&answer, &job, 1, ORTE_JOBID))) {
|
||||
@ -222,8 +222,8 @@ void orte_plm_base_receive_process_msg(int fd, short event, void *data)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output,
|
||||
"%s plm:base:receive got update_proc_state for job %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_JOBID_PRINT(job)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_jobids(job)));
|
||||
|
||||
/* lookup the job object */
|
||||
if (NULL == (jdata = orte_get_job_data_object(job))) {
|
||||
@ -252,7 +252,7 @@ void orte_plm_base_receive_process_msg(int fd, short event, void *data)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output,
|
||||
"%s plm:base:receive got update_proc_state for vpid %lu state %x exit_code %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
(unsigned long)vpid, (unsigned int)state, (int)exit_code));
|
||||
|
||||
/* update the termination counter IFF the state is changing to something
|
||||
@ -290,8 +290,8 @@ void orte_plm_base_receive_process_msg(int fd, short event, void *data)
|
||||
case ORTE_PLM_HEARTBEAT_CMD:
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output,
|
||||
"%s plm:base:receive got heartbeat from %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&mev->sender)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(&mev->sender)));
|
||||
/* lookup the daemon object */
|
||||
if (NULL == (jdata = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid))) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
|
||||
@ -331,8 +331,8 @@ void orte_plm_base_recv(int status, orte_process_name_t* sender,
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output,
|
||||
"%s plm:base:receive got message from %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(sender)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(sender)));
|
||||
|
||||
/* don't process this right away - we need to get out of the recv before
|
||||
* we process the message as it may ask us to do something that involves
|
||||
|
@ -510,7 +510,7 @@ int orte_plm_base_local_slave_launch(orte_job_t *jdata)
|
||||
if (0 < opal_output_get_verbosity(orte_plm_globals.output)) {
|
||||
param = opal_argv_join(argv, ' ');
|
||||
opal_output(0, "%s plm:rsh: final bootproxy cmd:\n\t%s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
(NULL == param) ? "NULL" : param);
|
||||
if (NULL != param) free(param);
|
||||
}
|
||||
|
@ -177,8 +177,8 @@ static int plm_ccp_launch_job(orte_job_t *jdata)
|
||||
if (ORTE_JOBID_INVALID != jdata->jobid) {
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||
"%s plm:ccp: launching job %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_JOBID_PRINT(jdata->jobid)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_jobids(jdata->jobid)));
|
||||
goto GETMAP;
|
||||
}
|
||||
|
||||
@ -190,8 +190,8 @@ static int plm_ccp_launch_job(orte_job_t *jdata)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||
"%s plm:ccp: launching job %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_JOBID_PRINT(jdata->jobid)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_jobids(jdata->jobid)));
|
||||
|
||||
/* setup the job */
|
||||
if (ORTE_SUCCESS != (rc = orte_plm_base_setup_job(jdata))) {
|
||||
@ -231,7 +231,7 @@ GETMAP:
|
||||
param = opal_argv_join(argv, ' ');
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||
"%s plm:ccp: final top-level argv:\n\t%s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
(NULL == param) ? "NULL" : param));
|
||||
if (NULL != param) free(param);
|
||||
}
|
||||
@ -297,7 +297,7 @@ GETMAP:
|
||||
apps[0]->prefix_dir, bin_base, env[i] + 5);
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||
"%s plm:ccp: resetting PATH: %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
newenv));
|
||||
opal_setenv("PATH", newenv, true, &env);
|
||||
free(newenv);
|
||||
@ -309,7 +309,7 @@ GETMAP:
|
||||
apps[0]->prefix_dir, lib_base, env[i] + 16);
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||
"%s plm:ccp: resetting LD_LIBRARY_PATH: %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
newenv));
|
||||
opal_setenv("LD_LIBRARY_PATH", newenv, true, &env);
|
||||
free(newenv);
|
||||
@ -388,7 +388,7 @@ GETMAP:
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||
"%s plm:ccp: launching on node %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
node->name));
|
||||
|
||||
/* setup process name */
|
||||
@ -407,7 +407,7 @@ GETMAP:
|
||||
param = opal_argv_join(argv, ' ');
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||
"%s plm:ccp: executing:\n\t%s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
(NULL == param) ? "NULL" : param));
|
||||
if (NULL != param) free(param);
|
||||
}
|
||||
@ -502,14 +502,14 @@ GETMAP:
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||
"%s plm:ccp:launch: finished spawning orteds",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
|
||||
/* wait for daemons to callback */
|
||||
if (ORTE_SUCCESS != (rc = orte_plm_base_daemon_callback(map->num_new_daemons))) {
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||
"%s plm:ccp: daemon launch failed for job %s on error %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_JOBID_PRINT(jdata->jobid), ORTE_ERROR_NAME(rc)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_jobids(jdata->jobid), ORTE_ERROR_NAME(rc)));
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
@ -519,8 +519,8 @@ launch_apps:
|
||||
if (ORTE_SUCCESS != (rc = orte_plm_base_launch_apps(jdata->jobid))) {
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||
"%s plm:ccp: launch of apps failed for job %s on error %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_JOBID_PRINT(jdata->jobid), ORTE_ERROR_NAME(rc)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_jobids(jdata->jobid), ORTE_ERROR_NAME(rc)));
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
@ -590,7 +590,7 @@ launch_apps:
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||
"%s plm:ccp:launch: finished",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
@ -163,8 +163,8 @@ static int plm_lsf_launch_job(orte_job_t *jdata)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||
"%s plm:lsf: launching job %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_JOBID_PRINT(jdata->jobid)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_jobids(jdata->jobid)));
|
||||
|
||||
/* setup the job */
|
||||
if (ORTE_SUCCESS != (rc = orte_plm_base_setup_job(jdata))) {
|
||||
@ -189,7 +189,7 @@ static int plm_lsf_launch_job(orte_job_t *jdata)
|
||||
/* have all the daemons we need - launch app */
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||
"%s plm:lsf: no new daemons to launch",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
goto launch_apps;
|
||||
}
|
||||
|
||||
@ -278,7 +278,7 @@ static int plm_lsf_launch_job(orte_job_t *jdata)
|
||||
cur_prefix = strdup(app_prefix_dir);
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||
"%s plm:lsf: Set prefix:%s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), cur_prefix));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME), cur_prefix));
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -322,8 +322,8 @@ static int plm_lsf_launch_job(orte_job_t *jdata)
|
||||
(rc = orte_plm_base_daemon_callback(map->num_new_daemons))) {
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||
"%s plm:lsf: daemon launch failed for job %s on error %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_JOBID_PRINT(active_job), ORTE_ERROR_NAME(rc)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_jobids(active_job), ORTE_ERROR_NAME(rc)));
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
@ -333,8 +333,8 @@ launch_apps:
|
||||
if (ORTE_SUCCESS != (rc = orte_plm_base_launch_apps(active_job))) {
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||
"%s plm:lsf: launch of apps failed for job %s on error %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_JOBID_PRINT(active_job), ORTE_ERROR_NAME(rc)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_jobids(active_job), ORTE_ERROR_NAME(rc)));
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
|
@ -184,7 +184,7 @@ static int orte_plm_process_probe(orte_node_t * node, orte_plm_process_shell * s
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||
"%s plm:process: going to check SHELL variable on node %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
node->name));
|
||||
|
||||
*shell = ORTE_PLM_RSH_SHELL_UNKNOWN;
|
||||
@ -349,7 +349,7 @@ static int orte_plm_process_probe(orte_node_t * node, orte_plm_process_shell * s
|
||||
*/
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||
"%s plm:process: node:%s has SHELL: %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
node->name, orte_plm_process_shell_name[*shell]));
|
||||
|
||||
return rc;
|
||||
@ -479,8 +479,8 @@ int orte_plm_process_launch(orte_job_t *jdata)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||
"%s plm:process: launching job %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_JOBID_PRINT(jdata->jobid)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_jobids(jdata->jobid)));
|
||||
|
||||
/* setup the job */
|
||||
if (ORTE_SUCCESS != (rc = orte_plm_base_setup_job(jdata))) {
|
||||
@ -504,7 +504,7 @@ int orte_plm_process_launch(orte_job_t *jdata)
|
||||
/* have all the daemons we need - launch app */
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||
"%s plm:process: no new daemons to launch",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
goto launch_apps;
|
||||
}
|
||||
|
||||
@ -570,7 +570,7 @@ int orte_plm_process_launch(orte_job_t *jdata)
|
||||
param = opal_argv_join(argv, ' ');
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||
"%s plm:process: final template argv:\n\t%s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
(NULL == param) ? "NULL" : param));
|
||||
if (NULL != param) free(param);
|
||||
}
|
||||
@ -632,7 +632,7 @@ int orte_plm_process_launch(orte_job_t *jdata)
|
||||
ORTE_ERROR_LOG(ORTE_ERR_FATAL);
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||
"%s plm:process:launch daemon failed to be defined on node %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
nodes[nnode]->name));
|
||||
return ORTE_ERR_FATAL;
|
||||
}
|
||||
@ -641,7 +641,7 @@ int orte_plm_process_launch(orte_job_t *jdata)
|
||||
param = opal_argv_join(argv, ' ');
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||
"%s plm:process: start daemon as:\n\t%s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
(NULL == param) ? "NULL" : param));
|
||||
if (NULL != param) free(param);
|
||||
}
|
||||
@ -650,7 +650,7 @@ int orte_plm_process_launch(orte_job_t *jdata)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||
"%s plm:process: launching on node %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
nodes[nnode]->name));
|
||||
|
||||
exec_argv = &argv[local_exec_index];
|
||||
@ -704,7 +704,7 @@ int orte_plm_process_launch(orte_job_t *jdata)
|
||||
opal_setenv("PATH", newenv, true, &environ);
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||
"%s plm:process: reset PATH: %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
newenv));
|
||||
free(newenv);
|
||||
|
||||
@ -720,7 +720,7 @@ int orte_plm_process_launch(orte_job_t *jdata)
|
||||
opal_setenv("LD_LIBRARY_PATH", newenv, true, &environ);
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||
"%s plm:process: reset LD_LIBRARY_PATH: %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
newenv));
|
||||
free(newenv);
|
||||
}
|
||||
@ -744,7 +744,7 @@ int orte_plm_process_launch(orte_job_t *jdata)
|
||||
if (NULL != var) {
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||
"%s plm:process: changing to directory %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
var));
|
||||
/* Ignore errors -- what are we going to do?
|
||||
(and we ignore errors on the remote nodes
|
||||
@ -784,7 +784,7 @@ int orte_plm_process_launch(orte_job_t *jdata)
|
||||
param = opal_argv_join(exec_argv, ' ');
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||
"%s plm:process: executing:\n\t%s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
(NULL == param) ? "NULL" : param));
|
||||
if (NULL != param) free(param);
|
||||
}
|
||||
@ -827,8 +827,8 @@ int orte_plm_process_launch(orte_job_t *jdata)
|
||||
if (ORTE_SUCCESS != (rc = orte_plm_base_daemon_callback(map->num_new_daemons))) {
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||
"%s plm:process: launch of apps failed for job %s on error %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_JOBID_PRINT(active_job), ORTE_ERROR_NAME(rc)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_jobids(active_job), ORTE_ERROR_NAME(rc)));
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
@ -836,8 +836,8 @@ launch_apps:
|
||||
if (ORTE_SUCCESS != (rc = orte_plm_base_launch_apps(active_job))) {
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||
"%s plm:process: launch of apps failed for job %s on error %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_JOBID_PRINT(active_job), ORTE_ERROR_NAME(rc)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_jobids(active_job), ORTE_ERROR_NAME(rc)));
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
|
@ -170,7 +170,7 @@ int orte_plm_rsh_component_query(mca_base_module_t **module, int *priority)
|
||||
opal_output_verbose(1, orte_plm_globals.output,
|
||||
"%s plm:rsh: unable to be used: cannot find path "
|
||||
"or execution permissions not set for launching agent \"%s\"\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_plm_globals.rsh_agent_argv[0]);
|
||||
*module = NULL;
|
||||
return ORTE_ERROR;
|
||||
@ -186,7 +186,7 @@ int orte_plm_rsh_component_query(mca_base_module_t **module, int *priority)
|
||||
tmp = opal_argv_join(orte_plm_globals.rsh_agent_argv, ' ');
|
||||
opal_output_verbose(1, orte_plm_globals.output,
|
||||
"%s plm:rsh: using \"%s\" for launching\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), tmp);
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME), tmp);
|
||||
free(tmp);
|
||||
}
|
||||
mca_plm_rsh_component.using_qrsh = true;
|
||||
@ -202,7 +202,7 @@ int orte_plm_rsh_component_query(mca_base_module_t **module, int *priority)
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||
"%s plm:rsh: unable to be used: cannot find path "
|
||||
"for launching agent \"%s\"\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_plm_globals.rsh_agent_argv[0]));
|
||||
*module = NULL;
|
||||
return ORTE_ERROR;
|
||||
|
@ -188,21 +188,21 @@ static int orte_plm_rsh_probe(char *nodename,
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||
"%s plm:rsh: going to check SHELL variable on node %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
nodename));
|
||||
|
||||
*shell = ORTE_PLM_RSH_SHELL_UNKNOWN;
|
||||
if (pipe(fd)) {
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||
"%s plm:rsh: pipe failed with errno=%d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
errno));
|
||||
return ORTE_ERR_IN_ERRNO;
|
||||
}
|
||||
if ((pid = fork()) < 0) {
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||
"%s plm:rsh: fork failed with errno=%d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
errno));
|
||||
return ORTE_ERR_IN_ERRNO;
|
||||
}
|
||||
@ -210,7 +210,7 @@ static int orte_plm_rsh_probe(char *nodename,
|
||||
if (dup2(fd[1], 1) < 0) {
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||
"%s plm:rsh: dup2 failed with errno=%d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
errno));
|
||||
exit(01);
|
||||
}
|
||||
@ -226,7 +226,7 @@ static int orte_plm_rsh_probe(char *nodename,
|
||||
if (close(fd[1])) {
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||
"%s plm:rsh: close failed with errno=%d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
errno));
|
||||
return ORTE_ERR_IN_ERRNO;
|
||||
}
|
||||
@ -243,7 +243,7 @@ static int orte_plm_rsh_probe(char *nodename,
|
||||
continue;
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||
"%s plm:rsh: Unable to detect the remote shell (error %s)",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
strerror(errno)));
|
||||
rc = ORTE_ERR_IN_ERRNO;
|
||||
break;
|
||||
@ -279,7 +279,7 @@ static int orte_plm_rsh_probe(char *nodename,
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||
"%s plm:rsh: node %s has SHELL: %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
nodename,
|
||||
(ORTE_PLM_RSH_SHELL_UNKNOWN == *shell) ? "UNHANDLED" : (char*)orte_plm_rsh_shell_name[*shell]));
|
||||
|
||||
@ -307,7 +307,7 @@ static void orte_plm_rsh_wait_daemon(pid_t pid, int status, void* cbdata)
|
||||
orte_vpid_t *vpid=(orte_vpid_t*)cbdata;
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||
"%s daemon %d failed with status %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
(int)*vpid, WEXITSTATUS(status)));
|
||||
OBJ_CONSTRUCT(&buf, opal_buffer_t);
|
||||
opal_dss.pack(&buf, &cnt, 1, ORTE_STD_CNTR);
|
||||
@ -322,7 +322,7 @@ static void orte_plm_rsh_wait_daemon(pid_t pid, int status, void* cbdata)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||
"%s daemon %d failed with status %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
(int)daemon->name.vpid, WEXITSTATUS(status)));
|
||||
/* note that this daemon failed */
|
||||
daemon->state = ORTE_PROC_STATE_FAILED_TO_START;
|
||||
@ -394,7 +394,7 @@ static int setup_shell(orte_plm_rsh_shell_t *rshell,
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||
"%s plm:rsh: local shell: %d (%s)",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
local_shell, orte_plm_rsh_shell_name[local_shell]));
|
||||
|
||||
/* What is our remote shell? */
|
||||
@ -402,7 +402,7 @@ static int setup_shell(orte_plm_rsh_shell_t *rshell,
|
||||
remote_shell = local_shell;
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||
"%s plm:rsh: assuming same remote shell as local shell",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
} else {
|
||||
rc = orte_plm_rsh_probe(nodename, &remote_shell);
|
||||
|
||||
@ -419,7 +419,7 @@ static int setup_shell(orte_plm_rsh_shell_t *rshell,
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||
"%s plm:rsh: remote shell: %d (%s)",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
remote_shell, orte_plm_rsh_shell_name[remote_shell]));
|
||||
|
||||
/* Do we need to source .profile on the remote side?
|
||||
@ -700,7 +700,7 @@ static int setup_launch(int *argcptr, char ***argvptr,
|
||||
param = opal_argv_join(argv, ' ');
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||
"%s plm:rsh: final template argv:\n\t%s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
(NULL == param) ? "NULL" : param));
|
||||
if (NULL != param) free(param);
|
||||
}
|
||||
@ -794,7 +794,7 @@ static void ssh_child(int argc, char **argv,
|
||||
var = opal_argv_join(argv, ' ');
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||
"%s plm:rsh: executing: (%s) [%s]",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
exec_path, (NULL == var) ? "NULL" : var));
|
||||
if (NULL != var) free(var);
|
||||
|
||||
@ -826,7 +826,7 @@ static int remote_spawn(opal_buffer_t *launch)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||
"%s plm:rsh: remote spawn called",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
|
||||
/* extract the prefix from the launch buffer */
|
||||
n = 1;
|
||||
@ -858,7 +858,7 @@ static int remote_spawn(opal_buffer_t *launch)
|
||||
if (opal_list_is_empty(&mca_plm_rsh_component.children)) {
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||
"%s plm:rsh: remote spawn - have no children!",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
failed_launch = false;
|
||||
rc = ORTE_SUCCESS;
|
||||
goto cleanup;
|
||||
@ -885,7 +885,7 @@ static int remote_spawn(opal_buffer_t *launch)
|
||||
/* get the host where this daemon resides */
|
||||
if (NULL == (hostname = orte_ess.proc_get_hostname(&child->name))) {
|
||||
opal_output(0, "%s unable to get hostname for daemon %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_VPID_PRINT(vpid));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME), orte_util_print_vpids(vpid));
|
||||
rc = ORTE_ERR_NOT_FOUND;
|
||||
goto cleanup;
|
||||
}
|
||||
@ -905,7 +905,7 @@ static int remote_spawn(opal_buffer_t *launch)
|
||||
if (pid == 0) {
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||
"%s plm:rsh: launching on node %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
hostname));
|
||||
|
||||
/* do the ssh launch - this will exit if it fails */
|
||||
@ -1010,8 +1010,8 @@ int orte_plm_rsh_launch(orte_job_t *jdata)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||
"%s plm:rsh: setting up job %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_JOBID_PRINT(jdata->jobid)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_jobids(jdata->jobid)));
|
||||
|
||||
/* setup the job */
|
||||
if (ORTE_SUCCESS != (rc = orte_plm_base_setup_job(jdata))) {
|
||||
@ -1035,7 +1035,7 @@ int orte_plm_rsh_launch(orte_job_t *jdata)
|
||||
/* have all the daemons we need - launch app */
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||
"%s plm:rsh: no new daemons to launch",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
goto launch_apps;
|
||||
}
|
||||
|
||||
@ -1163,7 +1163,7 @@ launch:
|
||||
if (nodes[nnode]->daemon_launched) {
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||
"%s plm:rsh:launch daemon already exists on node %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
nodes[nnode]->name));
|
||||
goto next_node;
|
||||
}
|
||||
@ -1175,7 +1175,7 @@ launch:
|
||||
ORTE_ERROR_LOG(ORTE_ERR_FATAL);
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||
"%s plm:rsh:launch daemon failed to be defined on node %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
nodes[nnode]->name));
|
||||
rc = ORTE_ERR_FATAL;
|
||||
goto cleanup;
|
||||
@ -1193,7 +1193,7 @@ launch:
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||
"%s plm:rsh: launching on node %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
nodes[nnode]->name));
|
||||
|
||||
/* fork a child to exec the rsh/ssh session */
|
||||
@ -1219,8 +1219,8 @@ launch:
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||
"%s plm:rsh: recording launch of daemon %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&nodes[nnode]->daemon->name)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(&nodes[nnode]->daemon->name)));
|
||||
|
||||
OPAL_THREAD_LOCK(&mca_plm_rsh_component.lock);
|
||||
/* This situation can lead to a deadlock if '--debug-daemons' is set.
|
||||
@ -1252,8 +1252,8 @@ next_node:
|
||||
if (ORTE_SUCCESS != (rc = orte_plm_base_daemon_callback(map->num_new_daemons))) {
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||
"%s plm:rsh: daemon launch failed for job %s on error %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_JOBID_PRINT(active_job), ORTE_ERROR_NAME(rc)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_jobids(active_job), ORTE_ERROR_NAME(rc)));
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
@ -1265,8 +1265,8 @@ launch_apps:
|
||||
if (ORTE_SUCCESS != (rc = orte_plm_base_launch_apps(active_job))) {
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||
"%s plm:rsh: launch of apps failed for job %s on error %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_JOBID_PRINT(active_job), ORTE_ERROR_NAME(rc)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_jobids(active_job), ORTE_ERROR_NAME(rc)));
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
@ -1315,8 +1315,8 @@ static int find_children(int rank, int parent, int me, int num_procs)
|
||||
child->name.vpid = peer;
|
||||
OPAL_OUTPUT_VERBOSE((3, orte_plm_globals.output,
|
||||
"%s plm:rsh find-children found child %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&child->name)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(&child->name)));
|
||||
|
||||
opal_list_append(&mca_plm_rsh_component.children, &child->item);
|
||||
}
|
||||
|
@ -107,7 +107,7 @@ static int orte_plm_slurm_component_query(mca_base_module_t **module, int *prior
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||
"%s plm:slurm: available for selection",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
|
||||
*module = (mca_base_module_t *)&orte_plm_slurm_module;
|
||||
return ORTE_SUCCESS;
|
||||
|
@ -199,8 +199,8 @@ static int plm_slurm_launch_job(orte_job_t *jdata)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||
"%s plm:slurm: launching job %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_JOBID_PRINT(jdata->jobid)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_jobids(jdata->jobid)));
|
||||
|
||||
/* setup the job */
|
||||
if (ORTE_SUCCESS != (rc = orte_plm_base_setup_job(jdata))) {
|
||||
@ -224,7 +224,7 @@ static int plm_slurm_launch_job(orte_job_t *jdata)
|
||||
/* no new daemons required - just launch apps */
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||
"%s plm:slurm: no new daemons to launch",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
goto launch_apps;
|
||||
}
|
||||
|
||||
@ -295,7 +295,7 @@ static int plm_slurm_launch_job(orte_job_t *jdata)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_plm_globals.output,
|
||||
"%s plm:slurm: launching on nodes %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), nodelist_flat));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME), nodelist_flat));
|
||||
|
||||
/*
|
||||
* ORTED OPTIONS
|
||||
@ -327,7 +327,7 @@ static int plm_slurm_launch_job(orte_job_t *jdata)
|
||||
param = opal_argv_join(argv, ' ');
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||
"%s plm:slurm: final top-level argv:\n\t%s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
(NULL == param) ? "NULL" : param));
|
||||
if (NULL != param) free(param);
|
||||
}
|
||||
@ -358,7 +358,7 @@ static int plm_slurm_launch_job(orte_job_t *jdata)
|
||||
cur_prefix = strdup(app_prefix_dir);
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||
"%s plm:slurm: Set prefix:%s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
cur_prefix));
|
||||
}
|
||||
}
|
||||
@ -393,8 +393,8 @@ static int plm_slurm_launch_job(orte_job_t *jdata)
|
||||
if (ORTE_SUCCESS != (rc = orte_plm_base_daemon_callback(map->num_new_daemons))) {
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||
"%s plm:slurm: daemon launch failed for job %s on error %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_JOBID_PRINT(active_job), ORTE_ERROR_NAME(rc)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_jobids(active_job), ORTE_ERROR_NAME(rc)));
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
@ -405,8 +405,8 @@ launch_apps:
|
||||
if (ORTE_SUCCESS != (rc = orte_plm_base_launch_apps(active_job))) {
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||
"%s plm:slurm: launch of apps failed for job %s on error %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_JOBID_PRINT(active_job), ORTE_ERROR_NAME(rc)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_jobids(active_job), ORTE_ERROR_NAME(rc)));
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
@ -485,7 +485,7 @@ static int plm_slurm_terminate_orteds(void)
|
||||
if (!primary_pid_set) {
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||
"%s plm:slurm: primary daemons complete!",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
jdata = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid);
|
||||
jdata->state = ORTE_JOB_STATE_TERMINATED;
|
||||
/* need to set the #terminated value to avoid an incorrect error msg */
|
||||
@ -554,7 +554,7 @@ static void srun_wait_cb(pid_t pid, int status, void* cbdata){
|
||||
/* report that one or more daemons failed to launch so we can exit */
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||
"%s plm:slurm: daemon failed during launch",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
orte_plm_base_launch_failed(ORTE_PROC_MY_NAME->jobid, -1, status, ORTE_JOB_STATE_FAILED_TO_START);
|
||||
} else {
|
||||
/* if this is after launch, then we need to abort only if the status
|
||||
@ -566,7 +566,7 @@ static void srun_wait_cb(pid_t pid, int status, void* cbdata){
|
||||
*/
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||
"%s plm:slurm: daemon failed while running",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
orte_plm_base_launch_failed(ORTE_PROC_MY_NAME->jobid, -1, status, ORTE_JOB_STATE_ABORTED);
|
||||
}
|
||||
/* otherwise, check to see if this is the primary pid */
|
||||
@ -576,7 +576,7 @@ static void srun_wait_cb(pid_t pid, int status, void* cbdata){
|
||||
*/
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||
"%s plm:slurm: primary daemons complete!",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
jdata = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid);
|
||||
jdata->state = ORTE_JOB_STATE_TERMINATED;
|
||||
/* need to set the #terminated value to avoid an incorrect error msg */
|
||||
@ -631,7 +631,7 @@ static int plm_slurm_start_proc(int argc, char **argv, char **env,
|
||||
opal_setenv("PATH", newenv, true, &env);
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||
"%s plm:slurm: reset PATH: %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
newenv));
|
||||
free(newenv);
|
||||
|
||||
@ -645,7 +645,7 @@ static int plm_slurm_start_proc(int argc, char **argv, char **env,
|
||||
opal_setenv("LD_LIBRARY_PATH", newenv, true, &env);
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||
"%s plm:slurm: reset LD_LIBRARY_PATH: %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
newenv));
|
||||
free(newenv);
|
||||
}
|
||||
|
@ -107,7 +107,7 @@ static int orte_plm_slurmd_component_query(mca_base_module_t **module, int *prio
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||
"%s plm:slurmd: available for selection",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
|
||||
*module = (mca_base_module_t *)&orte_plm_slurmd_module;
|
||||
return ORTE_SUCCESS;
|
||||
|
@ -174,8 +174,8 @@ static int plm_slurmd_launch_job(orte_job_t *jdata)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||
"%s plm:slurmd: launching job %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_JOBID_PRINT(jdata->jobid)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_jobids(jdata->jobid)));
|
||||
|
||||
/* setup the job */
|
||||
if (ORTE_SUCCESS != (rc = orte_plm_base_setup_job(jdata))) {
|
||||
@ -199,7 +199,7 @@ static int plm_slurmd_launch_job(orte_job_t *jdata)
|
||||
/* no new daemons required - just launch apps */
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||
"%s plm:slurmd: no new daemons to launch",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
goto launch_apps;
|
||||
}
|
||||
|
||||
@ -270,7 +270,7 @@ static int plm_slurmd_launch_job(orte_job_t *jdata)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_plm_globals.output,
|
||||
"%s plm:slurmd: launching on nodes %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), nodelist_flat));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME), nodelist_flat));
|
||||
|
||||
/*
|
||||
* ORTED OPTIONS
|
||||
@ -302,7 +302,7 @@ static int plm_slurmd_launch_job(orte_job_t *jdata)
|
||||
param = opal_argv_join(argv, ' ');
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||
"%s plm:slurmd: final top-level argv:\n\t%s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
(NULL == param) ? "NULL" : param));
|
||||
if (NULL != param) free(param);
|
||||
}
|
||||
@ -332,7 +332,7 @@ static int plm_slurmd_launch_job(orte_job_t *jdata)
|
||||
cur_prefix = strdup(app_prefix_dir);
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||
"%s plm:slurmd: Set prefix:%s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
cur_prefix));
|
||||
}
|
||||
}
|
||||
@ -365,8 +365,8 @@ static int plm_slurmd_launch_job(orte_job_t *jdata)
|
||||
if (ORTE_SUCCESS != (rc = orte_plm_base_daemon_callback(map->num_new_daemons))) {
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||
"%s plm:slurmd: daemon launch failed for job %s on error %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_JOBID_PRINT(active_job), ORTE_ERROR_NAME(rc)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_jobids(active_job), ORTE_ERROR_NAME(rc)));
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
@ -379,8 +379,8 @@ launch_apps:
|
||||
if (ORTE_SUCCESS != (rc = orte_plm_base_launch_apps(active_job))) {
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||
"%s plm:slurmd: launch of apps failed for job %s on error %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_JOBID_PRINT(active_job), ORTE_ERROR_NAME(rc)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_jobids(active_job), ORTE_ERROR_NAME(rc)));
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
@ -459,7 +459,7 @@ static int plm_slurmd_terminate_orteds(void)
|
||||
if (!primary_pid_set) {
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||
"%s plm:slurmd: primary daemons complete!",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
jdata = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid);
|
||||
jdata->state = ORTE_JOB_STATE_TERMINATED;
|
||||
/* need to set the #terminated value to avoid an incorrect error msg */
|
||||
@ -528,7 +528,7 @@ static void srun_wait_cb(pid_t pid, int status, void* cbdata){
|
||||
/* report that one or more daemons failed to launch so we can exit */
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||
"%s plm:slurmd: daemon failed during launch",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
orte_plm_base_launch_failed(ORTE_PROC_MY_NAME->jobid, -1, status, ORTE_JOB_STATE_FAILED_TO_START);
|
||||
} else {
|
||||
/* if this is after launch, then we need to abort only if the status
|
||||
@ -540,7 +540,7 @@ static void srun_wait_cb(pid_t pid, int status, void* cbdata){
|
||||
*/
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||
"%s plm:slurmd: daemon failed while running",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
orte_plm_base_launch_failed(ORTE_PROC_MY_NAME->jobid, -1, status, ORTE_JOB_STATE_ABORTED);
|
||||
}
|
||||
/* otherwise, check to see if this is the primary pid */
|
||||
@ -550,7 +550,7 @@ static void srun_wait_cb(pid_t pid, int status, void* cbdata){
|
||||
*/
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||
"%s plm:slurmd: primary daemons complete!",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
jdata = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid);
|
||||
jdata->state = ORTE_JOB_STATE_TERMINATED;
|
||||
/* need to set the #terminated value to avoid an incorrect error msg */
|
||||
@ -605,7 +605,7 @@ static int plm_slurmd_start_proc(int argc, char **argv, char **env,
|
||||
opal_setenv("PATH", newenv, true, &env);
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||
"%s plm:slurmd: reset PATH: %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
newenv));
|
||||
free(newenv);
|
||||
|
||||
@ -619,7 +619,7 @@ static int plm_slurmd_start_proc(int argc, char **argv, char **env,
|
||||
opal_setenv("LD_LIBRARY_PATH", newenv, true, &env);
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||
"%s plm:slurmd: reset LD_LIBRARY_PATH: %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
newenv));
|
||||
free(newenv);
|
||||
}
|
||||
|
@ -376,8 +376,8 @@ int orte_plm_submit_launch(orte_job_t *jdata)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||
"%s plm:submit: launching job %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_JOBID_PRINT(jdata->jobid)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_jobids(jdata->jobid)));
|
||||
|
||||
/* setup the job */
|
||||
if (ORTE_SUCCESS != (rc = orte_plm_base_setup_job(jdata))) {
|
||||
@ -408,7 +408,7 @@ int orte_plm_submit_launch(orte_job_t *jdata)
|
||||
/* have all the daemons we need - launch app */
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||
"%s plm:submit: no new daemons to launch",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
goto launch_apps;
|
||||
}
|
||||
|
||||
@ -907,8 +907,8 @@ int orte_plm_submit_launch(orte_job_t *jdata)
|
||||
if (ORTE_SUCCESS != (rc = orte_plm_base_daemon_callback(map->num_new_daemons))) {
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||
"%s plm:submit: daemon launch failed for job %s on error %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_JOBID_PRINT(active_job), ORTE_ERROR_NAME(rc)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_jobids(active_job), ORTE_ERROR_NAME(rc)));
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
@ -916,8 +916,8 @@ launch_apps:
|
||||
if (ORTE_SUCCESS != (rc = orte_plm_base_launch_apps(active_job))) {
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||
"%s plm:submit: launch of apps failed for job %s on error %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_JOBID_PRINT(active_job), ORTE_ERROR_NAME(rc)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_jobids(active_job), ORTE_ERROR_NAME(rc)));
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
|
@ -187,8 +187,8 @@ static int plm_tm_launch_job(orte_job_t *jdata)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||
"%s plm:tm: launching job %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_JOBID_PRINT(jdata->jobid)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_jobids(jdata->jobid)));
|
||||
|
||||
/* setup the job */
|
||||
if (ORTE_SUCCESS != (rc = orte_plm_base_setup_job(jdata))) {
|
||||
@ -236,7 +236,7 @@ static int plm_tm_launch_job(orte_job_t *jdata)
|
||||
param = opal_argv_join(argv, ' ');
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||
"%s plm:tm: final top-level argv:\n\t%s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
(NULL == param) ? "NULL" : param));
|
||||
if (NULL != param) free(param);
|
||||
}
|
||||
@ -284,7 +284,7 @@ static int plm_tm_launch_job(orte_job_t *jdata)
|
||||
apps[0]->prefix_dir, bin_base, env[i] + 5);
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||
"%s plm:tm: resetting PATH: %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
newenv));
|
||||
opal_setenv("PATH", newenv, true, &env);
|
||||
free(newenv);
|
||||
@ -296,7 +296,7 @@ static int plm_tm_launch_job(orte_job_t *jdata)
|
||||
apps[0]->prefix_dir, lib_base, env[i] + 16);
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||
"%s plm:tm: resetting LD_LIBRARY_PATH: %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
newenv));
|
||||
opal_setenv("LD_LIBRARY_PATH", newenv, true, &env);
|
||||
free(newenv);
|
||||
@ -318,7 +318,7 @@ static int plm_tm_launch_job(orte_job_t *jdata)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||
"%s plm:tm: launching on node %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
node->name));
|
||||
|
||||
/* setup process name */
|
||||
@ -336,7 +336,7 @@ static int plm_tm_launch_job(orte_job_t *jdata)
|
||||
param = opal_argv_join(argv, ' ');
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||
"%s plm:tm: executing:\n\t%s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
(NULL == param) ? "NULL" : param));
|
||||
if (NULL != param) free(param);
|
||||
}
|
||||
@ -357,7 +357,7 @@ static int plm_tm_launch_job(orte_job_t *jdata)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||
"%s plm:tm:launch: finished spawning orteds",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
|
||||
/* TM poll for all the spawns */
|
||||
for (i = 0; i < launched; ++i) {
|
||||
@ -375,7 +375,7 @@ static int plm_tm_launch_job(orte_job_t *jdata)
|
||||
if (0 < orte_startup_timeout) {
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||
"%s plm:tm: setting startup timer for %d milliseconds",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_startup_timeout));
|
||||
ORTE_DETECT_TIMEOUT(&ev, map->num_new_daemons,
|
||||
orte_startup_timeout*1000,
|
||||
@ -386,8 +386,8 @@ static int plm_tm_launch_job(orte_job_t *jdata)
|
||||
if (ORTE_SUCCESS != (rc = orte_plm_base_daemon_callback(map->num_new_daemons))) {
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||
"%s plm:tm: daemon launch failed for job %s on error %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_JOBID_PRINT(jdata->jobid), ORTE_ERROR_NAME(rc)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_jobids(jdata->jobid), ORTE_ERROR_NAME(rc)));
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
@ -404,8 +404,8 @@ launch_apps:
|
||||
if (ORTE_SUCCESS != (rc = orte_plm_base_launch_apps(jdata->jobid))) {
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||
"%s plm:tm: launch of apps failed for job %s on error %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_JOBID_PRINT(jdata->jobid), ORTE_ERROR_NAME(rc)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_jobids(jdata->jobid), ORTE_ERROR_NAME(rc)));
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
@ -453,7 +453,7 @@ launch_apps:
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||
"%s plm:tm:launch: finished",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
|
||||
return rc;
|
||||
}
|
||||
@ -556,13 +556,13 @@ static void failed_start(int fd, short dummy, void *arg)
|
||||
{
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||
"%s plm:tm:failed_start",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
|
||||
/* if we are aborting, ignore this */
|
||||
if (orte_abnormal_term_ordered) {
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||
"%s plm:tm:failed_start - abnormal term in progress",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -184,8 +184,8 @@ static int plm_tmd_launch_job(orte_job_t *jdata)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||
"%s plm:tm: launching job %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_JOBID_PRINT(jdata->jobid)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_jobids(jdata->jobid)));
|
||||
|
||||
/* setup the job */
|
||||
if (ORTE_SUCCESS != (rc = orte_plm_base_setup_job(jdata))) {
|
||||
@ -320,7 +320,7 @@ static int plm_tmd_launch_job(orte_job_t *jdata)
|
||||
param = opal_argv_join(argv, ' ');
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||
"%s plm:tm: final top-level argv:\n\t%s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
(NULL == param) ? "NULL" : param));
|
||||
if (NULL != param) free(param);
|
||||
}
|
||||
@ -363,7 +363,7 @@ static int plm_tmd_launch_job(orte_job_t *jdata)
|
||||
apps[0]->prefix_dir, bin_base, env[i] + 5);
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||
"%s plm:tm: resetting PATH: %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
newenv));
|
||||
opal_setenv("PATH", newenv, true, &env);
|
||||
free(newenv);
|
||||
@ -375,7 +375,7 @@ static int plm_tmd_launch_job(orte_job_t *jdata)
|
||||
apps[0]->prefix_dir, lib_base, env[i] + 16);
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||
"%s plm:tm: resetting LD_LIBRARY_PATH: %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
newenv));
|
||||
opal_setenv("LD_LIBRARY_PATH", newenv, true, &env);
|
||||
free(newenv);
|
||||
@ -400,7 +400,7 @@ static int plm_tmd_launch_job(orte_job_t *jdata)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||
"%s plm:tm: launching on node %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
node->name));
|
||||
|
||||
/* setup process name */
|
||||
@ -418,7 +418,7 @@ static int plm_tmd_launch_job(orte_job_t *jdata)
|
||||
param = opal_argv_join(argv, ' ');
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||
"%s plm:tm: executing:\n\t%s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
(NULL == param) ? "NULL" : param));
|
||||
if (NULL != param) free(param);
|
||||
}
|
||||
@ -439,7 +439,7 @@ static int plm_tmd_launch_job(orte_job_t *jdata)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||
"%s plm:tm:launch: finished spawning orteds",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
|
||||
/* setup a timer to give the cmd a chance to be sent */
|
||||
time_is_up = false;
|
||||
@ -497,7 +497,7 @@ static int plm_tmd_launch_job(orte_job_t *jdata)
|
||||
if (0 < orte_startup_timeout) {
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||
"%s plm:tm: setting startup timer for %d milliseconds",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_startup_timeout));
|
||||
ORTE_DETECT_TIMEOUT(&ev, map->num_new_daemons,
|
||||
orte_startup_timeout*1000,
|
||||
@ -508,8 +508,8 @@ static int plm_tmd_launch_job(orte_job_t *jdata)
|
||||
if (ORTE_SUCCESS != (rc = orte_plm_base_daemon_callback(map->num_new_daemons))) {
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||
"%s plm:tm: daemon launch failed for job %s on error %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_JOBID_PRINT(jdata->jobid), ORTE_ERROR_NAME(rc)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_jobids(jdata->jobid), ORTE_ERROR_NAME(rc)));
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
@ -526,8 +526,8 @@ launch_apps:
|
||||
if (ORTE_SUCCESS != (rc = orte_plm_base_launch_apps(jdata->jobid))) {
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||
"%s plm:tm: launch of apps failed for job %s on error %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_JOBID_PRINT(jdata->jobid), ORTE_ERROR_NAME(rc)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_jobids(jdata->jobid), ORTE_ERROR_NAME(rc)));
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
@ -565,7 +565,7 @@ launch_apps:
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||
"%s plm:tm:launch: finished",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
|
||||
return rc;
|
||||
}
|
||||
@ -610,7 +610,7 @@ int plm_tmd_terminate_orteds(void)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||
"%s plm:tm: terminating orteds",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
|
||||
/* lookup the daemon job object */
|
||||
if (NULL == (jdata = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid))) {
|
||||
@ -643,7 +643,7 @@ int plm_tmd_terminate_orteds(void)
|
||||
while (!time_is_up && 0 < alive) {
|
||||
OPAL_OUTPUT_VERBOSE((10, orte_plm_globals.output,
|
||||
"%s plm:tm: polling for daemon termination",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
rc = tm_poll(TM_NULL_EVENT, &eventpolled, (int)false, &local_err);
|
||||
if (TM_SUCCESS != rc) {
|
||||
errno = local_err;
|
||||
@ -654,7 +654,7 @@ int plm_tmd_terminate_orteds(void)
|
||||
if (eventpolled == TM_NULL_EVENT) {
|
||||
OPAL_OUTPUT_VERBOSE((10, orte_plm_globals.output,
|
||||
"%s plm:tm: got null event",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
/* give system a little time to progress */
|
||||
timer_fired = false;
|
||||
opal_evtimer_add(quicktime, &quicktimeval);
|
||||
@ -668,7 +668,7 @@ int plm_tmd_terminate_orteds(void)
|
||||
if (local_err == TM_ESYSTEM) {
|
||||
OPAL_OUTPUT_VERBOSE((10, orte_plm_globals.output,
|
||||
"%s plm:tm: got TM_ESYSTEM on obit - resubmitting",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
if (ORTE_SUCCESS != (rc = obit_submit(j))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto MOVEON;
|
||||
@ -681,7 +681,7 @@ int plm_tmd_terminate_orteds(void)
|
||||
if (0 != local_err) {
|
||||
OPAL_OUTPUT_VERBOSE((10, orte_plm_globals.output,
|
||||
"%s plm:tm: got error %d on obit for task %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), local_err, j));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME), local_err, j));
|
||||
rc = ORTE_ERROR;
|
||||
goto MOVEON;
|
||||
}
|
||||
@ -690,7 +690,7 @@ int plm_tmd_terminate_orteds(void)
|
||||
*(events_obit+j) = TM_NULL_EVENT;
|
||||
OPAL_OUTPUT_VERBOSE((10, orte_plm_globals.output,
|
||||
"%s plm:tm: task %d exited with status %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), j, *(evs+j)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME), j, *(evs+j)));
|
||||
/* update the termination status for this daemon */
|
||||
daemons[j+1]->exit_code = *(evs+j);
|
||||
if (0 != daemons[j+1]->exit_code) {
|
||||
@ -818,13 +818,13 @@ static void failed_start(int fd, short dummy, void *arg)
|
||||
{
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||
"%s plm:tm:failed_start",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
|
||||
/* if we are aborting, ignore this */
|
||||
if (orte_abnormal_term_ordered) {
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||
"%s plm:tm:failed_start - abnormal term in progress",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -52,7 +52,7 @@ int orte_ras_base_allocate(orte_job_t *jdata)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_ras_base.ras_output,
|
||||
"%s ras:base:allocate",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
|
||||
/* if we already did this, don't do it again - the pool of
|
||||
* global resources is set.
|
||||
@ -61,7 +61,7 @@ int orte_ras_base_allocate(orte_job_t *jdata)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_ras_base.ras_output,
|
||||
"%s ras:base:allocate allocation already read",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
|
||||
/* loop through the global node pool and set the
|
||||
* number of allocated slots to the difference
|
||||
@ -127,7 +127,7 @@ int orte_ras_base_allocate(orte_job_t *jdata)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_ras_base.ras_output,
|
||||
"%s ras:base:allocate nothing found in module - proceeding to hostfile",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
|
||||
/* nothing was found, or no active module was alive. Our next
|
||||
* option is to look for a hostfile and assign our global
|
||||
@ -141,7 +141,7 @@ int orte_ras_base_allocate(orte_job_t *jdata)
|
||||
if (NULL != orte_default_hostfile) {
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_ras_base.ras_output,
|
||||
"%s ras:base:allocate parsing default hostfile %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_default_hostfile));
|
||||
|
||||
/* a default hostfile was provided - parse it */
|
||||
@ -191,7 +191,7 @@ int orte_ras_base_allocate(orte_job_t *jdata)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_ras_base.ras_output,
|
||||
"%s ras:base:allocate checking hostfile %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
apps[i]->hostfile));
|
||||
|
||||
/* hostfile was specified - parse it and add it to the list */
|
||||
@ -226,7 +226,7 @@ int orte_ras_base_allocate(orte_job_t *jdata)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_ras_base.ras_output,
|
||||
"%s ras:base:allocate nothing found in hostfiles - checking dash-host options",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
|
||||
/* Our next option is to look for hosts provided via the -host
|
||||
* command line option. If they are present, we declare this
|
||||
@ -277,7 +277,7 @@ int orte_ras_base_allocate(orte_job_t *jdata)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_ras_base.ras_output,
|
||||
"%s ras:base:allocate nothing found in dash-host - inserting current node",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
|
||||
/* if nothing was found by any of the above methods, then we have no
|
||||
* earthly idea what to do - so just add the local host
|
||||
|
@ -74,7 +74,7 @@ int orte_ras_base_node_insert(opal_list_t* nodes, orte_job_t *jdata)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_ras_base.ras_output,
|
||||
"%s ras:base:node_insert inserting %ld nodes",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
(long)num_nodes));
|
||||
|
||||
/* set the size of the global array - this helps minimize time
|
||||
@ -99,7 +99,7 @@ int orte_ras_base_node_insert(opal_list_t* nodes, orte_job_t *jdata)
|
||||
if (opal_ifislocal(node->name)) {
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_ras_base.ras_output,
|
||||
"%s ras:base:node_insert updating HNP info to %ld slots",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
(long)node->slots));
|
||||
|
||||
/* flag that hnp has been allocated */
|
||||
@ -139,7 +139,7 @@ int orte_ras_base_node_insert(opal_list_t* nodes, orte_job_t *jdata)
|
||||
/* insert the object onto the orte_nodes global array */
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_ras_base.ras_output,
|
||||
"%s ras:base:node_insert node %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
(NULL == node->name) ? "NULL" : node->name));
|
||||
/* default allocate all the slots - may be modified later
|
||||
* as a result of filtering actions in mapper
|
||||
|
@ -103,13 +103,13 @@ static int orte_ras_gridengine_component_query(mca_base_module_t **module, int *
|
||||
NULL != getenv("PE_HOSTFILE") && NULL != getenv("JOB_ID")) {
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_ras_base.ras_output,
|
||||
"%s ras:gridengine: available for selection",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
*module = (mca_base_module_t *) &orte_ras_gridengine_module;
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_ras_base.ras_output,
|
||||
"%s ras:gridengine: NOT available for selection",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
*module = NULL;
|
||||
return ORTE_ERROR;
|
||||
}
|
||||
|
@ -84,7 +84,7 @@ static int orte_ras_loadleveler_component_query(mca_base_module_t **module, int
|
||||
mca_base_param_lookup_int(param_priority, priority);
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_ras_base.ras_output,
|
||||
"%s ras:loadleveler: available for selection",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
*module = (mca_base_module_t *) &orte_ras_loadleveler_module;
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
@ -92,7 +92,7 @@ static int orte_ras_loadleveler_component_query(mca_base_module_t **module, int
|
||||
/* Sadly, no */
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_ras_base.ras_output,
|
||||
"%s ras:loadleveler: NOT available for selection",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
*module = NULL;
|
||||
return ORTE_ERROR;
|
||||
}
|
||||
|
@ -87,7 +87,7 @@ static int orte_ras_slurm_component_query(mca_base_module_t **module, int *prior
|
||||
mca_base_param_lookup_int(param_priority, priority);
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_ras_base.ras_output,
|
||||
"%s ras:slurm: available for selection",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
*module = (mca_base_module_t *) &orte_ras_slurm_module;
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
@ -96,7 +96,7 @@ static int orte_ras_slurm_component_query(mca_base_module_t **module, int *prior
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_ras_base.ras_output,
|
||||
"%s ras:slurm: NOT available for selection",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
*module = NULL;
|
||||
return ORTE_ERROR;
|
||||
}
|
||||
|
@ -116,7 +116,7 @@ static int orte_ras_slurm_allocate(opal_list_t *nodes)
|
||||
if (ORTE_SUCCESS != ret) {
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_ras_base.ras_output,
|
||||
"%s ras:slurm:allocate: discover failed!",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -125,11 +125,11 @@ static int orte_ras_slurm_allocate(opal_list_t *nodes)
|
||||
if (ORTE_SUCCESS == ret) {
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_ras_base.ras_output,
|
||||
"%s ras:slurm:allocate: success",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
} else {
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_ras_base.ras_output,
|
||||
"%s ras:slurm:allocate: failure (base_allocate_nodes=%d)",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ret));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME), ret));
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
@ -142,7 +142,7 @@ static int orte_ras_slurm_finalize(void)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_ras_base.ras_output,
|
||||
"%s ras:slurm:finalize: success (nothing to do)",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
@ -180,7 +180,7 @@ static int orte_ras_slurm_discover(char *regexp, char *tasks_per_node,
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_ras_base.ras_output,
|
||||
"%s ras:slurm:allocate:discover: checking nodelist: %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
regexp));
|
||||
|
||||
do {
|
||||
@ -252,7 +252,7 @@ static int orte_ras_slurm_discover(char *regexp, char *tasks_per_node,
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_ras_base.ras_output,
|
||||
"%s ras:slurm:allocate:discover: found node %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
base));
|
||||
|
||||
if(ORTE_SUCCESS != (ret = opal_argv_append_nosize(&names, base))) {
|
||||
@ -336,7 +336,7 @@ static int orte_ras_slurm_discover(char *regexp, char *tasks_per_node,
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_ras_base.ras_output,
|
||||
"%s ras:slurm:allocate:discover: adding node %s (%d slot%s)",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
names[i], slots[i], (1 == slots[i]) ? "" : "s"));
|
||||
|
||||
node = OBJ_NEW(orte_node_t);
|
||||
@ -394,7 +394,7 @@ static int orte_ras_slurm_parse_ranges(char *base, char *ranges, char ***names)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_ras_base.ras_output,
|
||||
"%s ras:slurm:allocate:discover: parse range %s (2)",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
start));
|
||||
|
||||
ret = orte_ras_slurm_parse_range(base, start, names);
|
||||
|
@ -98,7 +98,7 @@ static int finalize(void)
|
||||
{
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_ras_base.ras_output,
|
||||
"%s ras:tm:finalize: success (nothing to do)",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
@ -150,7 +150,7 @@ static int discover(opal_list_t* nodelist, char *pbs_jobid)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_ras_base.ras_output,
|
||||
"%s ras:tm:allocate:discover: got hostname %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), hostname));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME), hostname));
|
||||
|
||||
/* Remember that TM may list the same node more than once. So
|
||||
we have to check for duplicates. */
|
||||
@ -164,7 +164,7 @@ static int discover(opal_list_t* nodelist, char *pbs_jobid)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_ras_base.ras_output,
|
||||
"%s ras:tm:allocate:discover: found -- bumped slots to %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), node->slots));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME), node->slots));
|
||||
|
||||
break;
|
||||
}
|
||||
@ -178,7 +178,7 @@ static int discover(opal_list_t* nodelist, char *pbs_jobid)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_ras_base.ras_output,
|
||||
"%s ras:tm:allocate:discover: not found -- added to list",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
|
||||
node = OBJ_NEW(orte_node_t);
|
||||
node->name = hostname;
|
||||
|
@ -218,7 +218,7 @@ int orte_rmaps_base_add_proc_to_map(orte_job_map_t *map, orte_node_t *node,
|
||||
/* if we get here, then this node isn't already in the map - add it */
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_rmaps_base.rmaps_output,
|
||||
"%s rmaps:base: adding node %s to map",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
(NULL == node->name) ? "NULL" : node->name));
|
||||
|
||||
if (ORTE_SUCCESS > (rc = opal_pointer_array_add(map->nodes, (void*)node))) {
|
||||
@ -235,8 +235,8 @@ PROCESS:
|
||||
*/
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_rmaps_base.rmaps_output,
|
||||
"%s rmaps:base: mapping proc %s to node %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&proc->name),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(&proc->name),
|
||||
(NULL == node->name) ? "NULL" : node->name));
|
||||
|
||||
if (0 > (rc = opal_pointer_array_add(node->procs, (void*)proc))) {
|
||||
@ -292,8 +292,8 @@ int orte_rmaps_base_claim_slot(orte_job_t *jdata,
|
||||
*/
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_rmaps_base.rmaps_output,
|
||||
"%s rmaps:base:claim_slot mapping rank %d to job %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
vpid, ORTE_JOBID_PRINT(jdata->jobid)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
vpid, orte_util_print_jobids(jdata->jobid)));
|
||||
if (ORTE_SUCCESS != (rc = opal_pointer_array_set_item(jdata->procs,
|
||||
(int)vpid,
|
||||
(void*)proc))) {
|
||||
@ -361,7 +361,7 @@ int orte_rmaps_base_compute_usage(orte_job_t *jdata)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_rmaps_base.rmaps_output,
|
||||
"%s rmaps:base:compute_usage",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
|
||||
/* point to map */
|
||||
map = jdata->map;
|
||||
@ -438,7 +438,7 @@ int orte_rmaps_base_define_daemons(orte_job_map_t *map)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_rmaps_base.rmaps_output,
|
||||
"%s rmaps:base:define_daemons",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
|
||||
/* get the daemon job data struct */
|
||||
if (NULL == (daemons = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid))) {
|
||||
@ -474,8 +474,8 @@ int orte_rmaps_base_define_daemons(orte_job_map_t *map)
|
||||
proc->nodename = node->name;
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_rmaps_base.rmaps_output,
|
||||
"%s rmaps:base:define_daemons add new daemon %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&proc->name)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(&proc->name)));
|
||||
/* add the daemon to the daemon job object */
|
||||
if (0 > (rc = opal_pointer_array_add(daemons->procs, (void*)proc))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
@ -498,8 +498,8 @@ int orte_rmaps_base_define_daemons(orte_job_map_t *map)
|
||||
node->daemon_launched = true;
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_rmaps_base.rmaps_output,
|
||||
"%s rmaps:base:define_daemons existing daemon %s already launched",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&node->daemon->name)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(&node->daemon->name)));
|
||||
/* count number of daemons being used */
|
||||
++numdaemons;
|
||||
}
|
||||
|
@ -71,8 +71,8 @@ static int orte_rmaps_seq_map(orte_job_t *jdata)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_rmaps_base.rmaps_output,
|
||||
"%s rmaps:seq mapping job %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_JOBID_PRINT(jdata->jobid)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_jobids(jdata->jobid)));
|
||||
|
||||
/* conveniece def */
|
||||
map = jdata->map;
|
||||
|
@ -83,7 +83,7 @@ int orte_rml_base_update_contact_info(opal_buffer_t* data)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_rml_base_output,
|
||||
"%s rml:base:update:contact:info got uri %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
NULL == rml_uri ? "NULL" : rml_uri));
|
||||
|
||||
if (NULL != rml_uri) {
|
||||
|
@ -92,8 +92,8 @@ static void process_message(int fd, short event, void *data)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_rml_base_output,
|
||||
"%s rml:base:recv: processing message from %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&mev->sender)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(&mev->sender)));
|
||||
|
||||
count = 1;
|
||||
if (ORTE_SUCCESS != (rc = opal_dss.unpack(mev->buffer, &command, &count, ORTE_RML_CMD))) {
|
||||
@ -119,8 +119,8 @@ static void process_message(int fd, short event, void *data)
|
||||
*/
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_rml_base_output,
|
||||
"%s rml:base:recv: sending ack to %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&mev->sender)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(&mev->sender)));
|
||||
|
||||
OBJ_CONSTRUCT(&buf, opal_buffer_t);
|
||||
if (0 > (rc = orte_rml.send_buffer(&mev->sender, &buf, ORTE_RML_TAG_UPDATE_ROUTE_ACK, 0))) {
|
||||
|
@ -162,7 +162,7 @@ int orte_rml_ftrm_send(orte_process_name_t* peer,
|
||||
|
||||
opal_output_verbose(20, rml_ftrm_output_handle,
|
||||
"orte_rml_ftrm: send(%s, %d, %d, %d )",
|
||||
ORTE_NAME_PRINT(peer), count, tag, flags);
|
||||
orte_util_print_name_args(peer), count, tag, flags);
|
||||
|
||||
if( NULL != orte_rml_ftrm_wrapped_module.send ) {
|
||||
if( ORTE_SUCCESS != (ret = orte_rml_ftrm_wrapped_module.send(peer, msg, count, tag, flags) ) ) {
|
||||
@ -188,7 +188,7 @@ int orte_rml_ftrm_send_nb(orte_process_name_t* peer,
|
||||
|
||||
opal_output_verbose(20, rml_ftrm_output_handle,
|
||||
"orte_rml_ftrm: send_nb(%s, %d, %d, %d )",
|
||||
ORTE_NAME_PRINT(peer), count, tag, flags);
|
||||
orte_util_print_name_args(peer), count, tag, flags);
|
||||
|
||||
if( NULL != orte_rml_ftrm_wrapped_module.send_nb ) {
|
||||
if( ORTE_SUCCESS != (ret = orte_rml_ftrm_wrapped_module.send_nb(peer, msg, count, tag, flags, cbfunc, cbdata) ) ) {
|
||||
@ -211,7 +211,7 @@ int orte_rml_ftrm_send_buffer(orte_process_name_t* peer,
|
||||
|
||||
opal_output_verbose(20, rml_ftrm_output_handle,
|
||||
"orte_rml_ftrm: send_buffer(%s, %d, %d )",
|
||||
ORTE_NAME_PRINT(peer), tag, flags);
|
||||
orte_util_print_name_args(peer), tag, flags);
|
||||
|
||||
if( NULL != orte_rml_ftrm_wrapped_module.send_buffer ) {
|
||||
if( ORTE_SUCCESS != (ret = orte_rml_ftrm_wrapped_module.send_buffer(peer, buffer, tag, flags) ) ) {
|
||||
@ -236,7 +236,7 @@ int orte_rml_ftrm_send_buffer_nb(orte_process_name_t* peer,
|
||||
|
||||
opal_output_verbose(20, rml_ftrm_output_handle,
|
||||
"orte_rml_ftrm: send_buffer_nb(%s, %d, %d )",
|
||||
ORTE_NAME_PRINT(peer), tag, flags);
|
||||
orte_util_print_name_args(peer), tag, flags);
|
||||
|
||||
if( NULL != orte_rml_ftrm_wrapped_module.send_buffer_nb ) {
|
||||
if( ORTE_SUCCESS != (ret = orte_rml_ftrm_wrapped_module.send_buffer_nb(peer, buffer, tag, flags, cbfunc, cbdata) ) ) {
|
||||
@ -261,7 +261,7 @@ int orte_rml_ftrm_recv(orte_process_name_t* peer,
|
||||
|
||||
opal_output_verbose(20, rml_ftrm_output_handle,
|
||||
"orte_rml_ftrm: recv(%s, %d, %d, %d )",
|
||||
ORTE_NAME_PRINT(peer), count, tag, flags);
|
||||
orte_util_print_name_args(peer), count, tag, flags);
|
||||
|
||||
if( NULL != orte_rml_ftrm_wrapped_module.recv ) {
|
||||
if( ORTE_SUCCESS != (ret = orte_rml_ftrm_wrapped_module.recv(peer, msg, count, tag, flags) ) ) {
|
||||
@ -287,7 +287,7 @@ int orte_rml_ftrm_recv_nb(orte_process_name_t* peer,
|
||||
|
||||
opal_output_verbose(20, rml_ftrm_output_handle,
|
||||
"orte_rml_ftrm: recv_nb(%s, %d, %d, %d )",
|
||||
ORTE_NAME_PRINT(peer), count, tag, flags);
|
||||
orte_util_print_name_args(peer), count, tag, flags);
|
||||
|
||||
if( NULL != orte_rml_ftrm_wrapped_module.recv_nb ) {
|
||||
if( ORTE_SUCCESS != (ret = orte_rml_ftrm_wrapped_module.recv_nb(peer, msg, count, tag, flags, cbfunc, cbdata) ) ) {
|
||||
@ -310,7 +310,7 @@ int orte_rml_ftrm_recv_buffer(orte_process_name_t* peer,
|
||||
|
||||
opal_output_verbose(20, rml_ftrm_output_handle,
|
||||
"orte_rml_ftrm: recv_buffer(%s, %d )",
|
||||
ORTE_NAME_PRINT(peer), tag);
|
||||
orte_util_print_name_args(peer), tag);
|
||||
|
||||
if( NULL != orte_rml_ftrm_wrapped_module.recv_buffer ) {
|
||||
if( ORTE_SUCCESS != (ret = orte_rml_ftrm_wrapped_module.recv_buffer(peer, buf, tag, flags) ) ) {
|
||||
@ -334,7 +334,7 @@ int orte_rml_ftrm_recv_buffer_nb(orte_process_name_t* peer,
|
||||
|
||||
opal_output_verbose(20, rml_ftrm_output_handle,
|
||||
"orte_rml_ftrm: recv_buffer_nb(%s, %d, %d)",
|
||||
ORTE_NAME_PRINT(peer), tag, flags);
|
||||
orte_util_print_name_args(peer), tag, flags);
|
||||
|
||||
if( NULL != orte_rml_ftrm_wrapped_module.recv_buffer_nb ) {
|
||||
if( ORTE_SUCCESS != (ret = orte_rml_ftrm_wrapped_module.recv_buffer_nb(peer, tag, flags, cbfunc, cbdata) ) ) {
|
||||
|
@ -344,9 +344,9 @@ rml_oob_queued_progress(int fd, short event, void *arg)
|
||||
if (next.vpid == ORTE_VPID_INVALID) {
|
||||
opal_output(0,
|
||||
"%s:queued progress tried routing message from %s to %s:%d, can't find route",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&hdr->origin),
|
||||
ORTE_NAME_PRINT(&hdr->destination),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(&hdr->origin),
|
||||
orte_util_print_name_args(&hdr->destination),
|
||||
hdr->tag);
|
||||
opal_backtrace_print(stderr);
|
||||
orte_errmgr.abort(ORTE_ERROR_DEFAULT_EXIT_CODE, NULL);
|
||||
@ -354,9 +354,9 @@ rml_oob_queued_progress(int fd, short event, void *arg)
|
||||
|
||||
if (OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, &next, ORTE_PROC_MY_NAME)) {
|
||||
opal_output(0, "%s:queued progress trying to get message from %s to %s:%d, routing loop",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&hdr->origin),
|
||||
ORTE_NAME_PRINT(&hdr->destination),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(&hdr->origin),
|
||||
orte_util_print_name_args(&hdr->destination),
|
||||
hdr->tag);
|
||||
opal_backtrace_print(stderr);
|
||||
orte_errmgr.abort(ORTE_ERROR_DEFAULT_EXIT_CODE, NULL);
|
||||
@ -370,10 +370,10 @@ rml_oob_queued_progress(int fd, short event, void *arg)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_rml_base_output,
|
||||
"%s routing message from %s for %s to %s (tag: %d)",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&hdr->origin),
|
||||
ORTE_NAME_PRINT(&hdr->destination),
|
||||
ORTE_NAME_PRINT(&next),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(&hdr->origin),
|
||||
orte_util_print_name_args(&hdr->destination),
|
||||
orte_util_print_name_args(&next),
|
||||
hdr->tag));
|
||||
|
||||
ORTE_RML_OOB_MSG_HEADER_HTON(*hdr);
|
||||
@ -402,9 +402,9 @@ rml_oob_queued_progress(int fd, short event, void *arg)
|
||||
} else {
|
||||
opal_output(0,
|
||||
"%s failed to send message from %s to %s:%d %s (rc = %d)",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&next),
|
||||
ORTE_NAME_PRINT(&origin),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(&next),
|
||||
orte_util_print_name_args(&origin),
|
||||
real_tag,
|
||||
ORTE_ERROR_NAME(ret),
|
||||
ret);
|
||||
@ -449,9 +449,9 @@ rml_oob_recv_route_callback(int status,
|
||||
next = orte_routed.get_route(&hdr->destination);
|
||||
if (next.vpid == ORTE_VPID_INVALID) {
|
||||
opal_output(0, "%s:route_callback tried routing message from %s to %s:%d, can't find route",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&origin),
|
||||
ORTE_NAME_PRINT(&hdr->destination),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(&origin),
|
||||
orte_util_print_name_args(&hdr->destination),
|
||||
hdr->tag);
|
||||
opal_backtrace_print(stderr);
|
||||
orte_errmgr.abort(ORTE_ERROR_DEFAULT_EXIT_CODE, NULL);
|
||||
@ -459,9 +459,9 @@ rml_oob_recv_route_callback(int status,
|
||||
|
||||
if (OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, &next, ORTE_PROC_MY_NAME)) {
|
||||
opal_output(0, "%s:route_callback trying to get message from %s to %s:%d, routing loop",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&origin),
|
||||
ORTE_NAME_PRINT(&hdr->destination),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(&origin),
|
||||
orte_util_print_name_args(&hdr->destination),
|
||||
hdr->tag);
|
||||
opal_backtrace_print(stderr);
|
||||
orte_errmgr.abort(ORTE_ERROR_DEFAULT_EXIT_CODE, NULL);
|
||||
@ -475,10 +475,10 @@ rml_oob_recv_route_callback(int status,
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_rml_base_output,
|
||||
"%s routing message from %s for %s to %s (tag: %d)",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&hdr->origin),
|
||||
ORTE_NAME_PRINT(&hdr->destination),
|
||||
ORTE_NAME_PRINT(&next),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(&hdr->origin),
|
||||
orte_util_print_name_args(&hdr->destination),
|
||||
orte_util_print_name_args(&next),
|
||||
hdr->tag));
|
||||
|
||||
ORTE_RML_OOB_MSG_HEADER_HTON(*hdr);
|
||||
@ -498,8 +498,8 @@ rml_oob_recv_route_callback(int status,
|
||||
orte_rml_oob_queued_msg_t *qmsg = OBJ_NEW(orte_rml_oob_queued_msg_t);
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_rml_base_output,
|
||||
"%s: no OOB information for %s. Queuing for later.",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&next)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(&next)));
|
||||
ORTE_RML_OOB_MSG_HEADER_NTOH(*hdr);
|
||||
qmsg->payload[0].iov_base = (IOVBASE_TYPE*) malloc(iov[0].iov_len);
|
||||
if (NULL == qmsg->payload[0].iov_base) abort();
|
||||
@ -516,8 +516,8 @@ rml_oob_recv_route_callback(int status,
|
||||
} else {
|
||||
opal_output(0,
|
||||
"%s failed to send message to %s: %s (rc = %d)",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&next),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(&next),
|
||||
opal_strerror(ret),
|
||||
ret);
|
||||
orte_errmgr.abort(ORTE_ERROR_DEFAULT_EXIT_CODE, NULL);
|
||||
|
@ -34,9 +34,9 @@ orte_rml_recv_msg_callback(int status,
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_rml_base_output,
|
||||
"%s recv from %s for %s (tag %d)",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&hdr->origin),
|
||||
ORTE_NAME_PRINT(&hdr->destination),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(&hdr->origin),
|
||||
orte_util_print_name_args(&hdr->destination),
|
||||
hdr->tag));
|
||||
|
||||
if (msg->msg_type == ORTE_RML_BLOCKING_RECV) {
|
||||
|
@ -103,7 +103,7 @@ orte_rml_oob_send(orte_process_name_t* peer,
|
||||
next = orte_routed.get_route(peer);
|
||||
if (next.vpid == ORTE_VPID_INVALID) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_ADDRESSEE_UNKNOWN);
|
||||
opal_output(0, "%s could not get route to %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(peer));
|
||||
opal_output(0, "%s could not get route to %s", orte_util_print_name_args(ORTE_PROC_MY_NAME), orte_util_print_name_args(peer));
|
||||
return ORTE_ERR_ADDRESSEE_UNKNOWN;
|
||||
}
|
||||
msg->msg_data = (struct iovec *) malloc(sizeof(struct iovec) * (count + 1));
|
||||
@ -129,9 +129,9 @@ orte_rml_oob_send(orte_process_name_t* peer,
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_rml_base_output,
|
||||
"rml_send %s -> %s (router %s, tag %d, %d)",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(peer),
|
||||
ORTE_NAME_PRINT(&next),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(peer),
|
||||
orte_util_print_name_args(&next),
|
||||
tag,
|
||||
real_tag));
|
||||
ret = orte_rml_oob_module.active_oob->oob_send_nb(&next,
|
||||
@ -144,8 +144,8 @@ orte_rml_oob_send(orte_process_name_t* peer,
|
||||
msg);
|
||||
if (ret < 0) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
opal_output(0, "%s attempted to send to %s: tag %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&next), (int)real_tag);
|
||||
opal_output(0, "%s attempted to send to %s: tag %d", orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(&next), (int)real_tag);
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
@ -192,7 +192,7 @@ orte_rml_oob_send_nb(orte_process_name_t* peer,
|
||||
next = orte_routed.get_route(peer);
|
||||
if (next.vpid == ORTE_VPID_INVALID) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_ADDRESSEE_UNKNOWN);
|
||||
opal_output(0, "%s could not get route to %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(peer));
|
||||
opal_output(0, "%s could not get route to %s", orte_util_print_name_args(ORTE_PROC_MY_NAME), orte_util_print_name_args(peer));
|
||||
return ORTE_ERR_ADDRESSEE_UNKNOWN;
|
||||
}
|
||||
|
||||
@ -220,9 +220,9 @@ orte_rml_oob_send_nb(orte_process_name_t* peer,
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_rml_base_output,
|
||||
"rml_send_nb %s -> %s (router %s, tag %d, %d)",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(peer),
|
||||
ORTE_NAME_PRINT(&next),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(peer),
|
||||
orte_util_print_name_args(&next),
|
||||
tag, real_tag));
|
||||
ret = orte_rml_oob_module.active_oob->oob_send_nb(&next,
|
||||
ORTE_PROC_MY_NAME,
|
||||
@ -234,8 +234,8 @@ orte_rml_oob_send_nb(orte_process_name_t* peer,
|
||||
msg);
|
||||
if (ret < 0) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
opal_output(0, "%s attempted to send to %s: tag %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&next), (int)real_tag);
|
||||
opal_output(0, "%s attempted to send to %s: tag %d", orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(&next), (int)real_tag);
|
||||
OBJ_RELEASE(msg);
|
||||
}
|
||||
|
||||
@ -313,8 +313,8 @@ orte_rml_oob_send_buffer_nb(orte_process_name_t* peer,
|
||||
if (next.vpid == ORTE_VPID_INVALID) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_ADDRESSEE_UNKNOWN);
|
||||
opal_output(0, "%s unable to find address for %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(peer));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(peer));
|
||||
return ORTE_ERR_ADDRESSEE_UNKNOWN;
|
||||
}
|
||||
|
||||
@ -342,9 +342,9 @@ orte_rml_oob_send_buffer_nb(orte_process_name_t* peer,
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_rml_base_output,
|
||||
"rml_send_buffer_nb %s -> %s (router %s, tag %d, %d)",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(peer),
|
||||
ORTE_NAME_PRINT(&next),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(peer),
|
||||
orte_util_print_name_args(&next),
|
||||
tag, real_tag));
|
||||
|
||||
ret = orte_rml_oob_module.active_oob->oob_send_nb(&next,
|
||||
@ -358,8 +358,8 @@ orte_rml_oob_send_buffer_nb(orte_process_name_t* peer,
|
||||
|
||||
if (ret < 0) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
opal_output(0, "%s attempted to send to %s: tag %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&next), (int)real_tag);
|
||||
opal_output(0, "%s attempted to send to %s: tag %d", orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(&next), (int)real_tag);
|
||||
OBJ_RELEASE(msg);
|
||||
OBJ_RELEASE(buffer);
|
||||
}
|
||||
|
@ -63,7 +63,7 @@ int orte_routed_base_comm_start(void)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_routed_base_output,
|
||||
"%s routed:base: Receive: Start command recv",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
|
||||
if (ORTE_SUCCESS != (rc = orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD,
|
||||
ORTE_RML_TAG_INIT_ROUTES,
|
||||
@ -89,7 +89,7 @@ int orte_routed_base_comm_stop(void)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_routed_base_output,
|
||||
"%s routed:base:receive stop comm",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
|
||||
if (ORTE_SUCCESS != (rc = orte_rml.recv_cancel(ORTE_NAME_WILDCARD, ORTE_RML_TAG_INIT_ROUTES))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
@ -138,8 +138,8 @@ void orte_routed_base_recv(int status, orte_process_name_t* sender,
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_routed_base_output,
|
||||
"%s routed:base:receive got message from %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(sender)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(sender)));
|
||||
|
||||
/* don't process this right away - we need to get out of the recv before
|
||||
* we process the message as it may ask us to do something that involves
|
||||
|
@ -163,8 +163,8 @@ static int delete_route(orte_process_name_t *proc)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_routed_base_output,
|
||||
"%s routed_binomial_delete_route for %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(proc)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(proc)));
|
||||
|
||||
|
||||
/* if this is from a different job family, then I need to
|
||||
@ -238,9 +238,9 @@ static int update_route(orte_process_name_t *target,
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_routed_base_output,
|
||||
"%s routed_binomial_update: %s --> %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(target),
|
||||
ORTE_NAME_PRINT(route)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(target),
|
||||
orte_util_print_name_args(route)));
|
||||
|
||||
|
||||
/* if this is from a different job family, then I need to
|
||||
@ -258,9 +258,9 @@ static int update_route(orte_process_name_t *target,
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_routed_base_output,
|
||||
"%s routed_binomial_update: diff job family routing job %s --> %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_JOBID_PRINT(target->jobid),
|
||||
ORTE_NAME_PRINT(route)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_jobids(target->jobid),
|
||||
orte_util_print_name_args(route)));
|
||||
|
||||
/* see if this target is already present - it will have a wildcard vpid,
|
||||
* so we have to look for it with that condition
|
||||
@ -294,7 +294,7 @@ static int update_route(orte_process_name_t *target,
|
||||
|
||||
/* THIS CAME FROM OUR OWN JOB FAMILY... */
|
||||
|
||||
opal_output(0, "%s CALL TO UPDATE ROUTE FOR OWN JOB FAMILY", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
||||
opal_output(0, "%s CALL TO UPDATE ROUTE FOR OWN JOB FAMILY", orte_util_print_name_args(ORTE_PROC_MY_NAME));
|
||||
|
||||
return ORTE_ERR_NOT_SUPPORTED;
|
||||
}
|
||||
@ -363,7 +363,7 @@ static orte_process_name_t get_route(orte_process_name_t *target)
|
||||
ORTE_PROC_MY_HNP->vpid == target->vpid) {
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_routed_base_output,
|
||||
"%s routing not enabled - going direct",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
ret = target;
|
||||
goto found;
|
||||
}
|
||||
@ -387,9 +387,9 @@ static orte_process_name_t get_route(orte_process_name_t *target)
|
||||
found:
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_routed_base_output,
|
||||
"%s routed_binomial_get(%s) --> %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(target),
|
||||
ORTE_NAME_PRINT(ret)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(target),
|
||||
orte_util_print_name_args(ret)));
|
||||
|
||||
return *ret;
|
||||
}
|
||||
@ -416,7 +416,7 @@ static int process_callback(orte_jobid_t job, opal_buffer_t *buffer)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_routed_base_output,
|
||||
"%s routed_binomial:callback got uri %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
(NULL == rml_uri) ? "NULL" : rml_uri));
|
||||
|
||||
if (rml_uri == NULL) continue;
|
||||
@ -510,8 +510,8 @@ static int init_routes(orte_jobid_t job, opal_buffer_t *ndat)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_routed_base_output,
|
||||
"%s routed_binomial: init routes for daemon job %s\n\thnp_uri %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_JOBID_PRINT(job),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_jobids(job),
|
||||
(NULL == orte_proc_info.my_hnp_uri) ? "NULL" : orte_proc_info.my_hnp_uri));
|
||||
|
||||
if (NULL == ndat) {
|
||||
@ -555,7 +555,7 @@ static int init_routes(orte_jobid_t job, opal_buffer_t *ndat)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_routed_base_output,
|
||||
"%s routed_binomial: completed init routes",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
@ -565,8 +565,8 @@ static int init_routes(orte_jobid_t job, opal_buffer_t *ndat)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_routed_base_output,
|
||||
"%s routed_binomial: init routes for HNP job %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_JOBID_PRINT(job)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_jobids(job)));
|
||||
|
||||
if (NULL == ndat) {
|
||||
/* if ndat is NULL, then this is being called during init, so just
|
||||
@ -615,7 +615,7 @@ static int init_routes(orte_jobid_t job, opal_buffer_t *ndat)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_routed_base_output,
|
||||
"%s routed_binomial: init routes w/non-NULL data",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
|
||||
/* if this is for a job family of zero, then we know that the enclosed
|
||||
* procs are local slaves to our daemon. In that case, we can just ignore this
|
||||
@ -633,8 +633,8 @@ static int init_routes(orte_jobid_t job, opal_buffer_t *ndat)
|
||||
*/
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_routed_base_output,
|
||||
"%s routed_binomial_init_routes: diff job family - sending update to %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_HNP)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_HNP)));
|
||||
|
||||
if (0 > (rc = orte_rml.send_buffer(ORTE_PROC_MY_HNP, ndat,
|
||||
ORTE_RML_TAG_RML_INFO_UPDATE, 0))) {
|
||||
@ -653,7 +653,7 @@ static int init_routes(orte_jobid_t job, opal_buffer_t *ndat)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_routed_base_output,
|
||||
"%s routed_binomial_init_routes: ack recvd",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
|
||||
/* our get_route function automatically routes all messages for
|
||||
* other job families via the HNP, so nothing more to do here
|
||||
@ -668,7 +668,7 @@ static int init_routes(orte_jobid_t job, opal_buffer_t *ndat)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_routed_base_output,
|
||||
"%s routed_binomial: init routes for proc job %s\n\thnp_uri %s\n\tdaemon uri %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_JOBID_PRINT(job),
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME), orte_util_print_jobids(job),
|
||||
(NULL == orte_proc_info.my_hnp_uri) ? "NULL" : orte_proc_info.my_hnp_uri,
|
||||
(NULL == orte_proc_info.my_daemon_uri) ? "NULL" : orte_proc_info.my_daemon_uri));
|
||||
|
||||
@ -677,13 +677,13 @@ static int init_routes(orte_jobid_t job, opal_buffer_t *ndat)
|
||||
* we didn't get it, then error out
|
||||
*/
|
||||
opal_output(0, "%s ERROR: Failed to identify the local daemon's URI",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME));
|
||||
opal_output(0, "%s ERROR: This is a fatal condition when the binomial router",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME));
|
||||
opal_output(0, "%s ERROR: has been selected - either select the unity router",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME));
|
||||
opal_output(0, "%s ERROR: or ensure that the local daemon info is provided",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME));
|
||||
return ORTE_ERR_FATAL;
|
||||
}
|
||||
|
||||
@ -751,8 +751,8 @@ static int route_lost(const orte_process_name_t *route)
|
||||
NULL != lifeline &&
|
||||
OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, route, lifeline)) {
|
||||
opal_output(0, "%s routed:binomial: Connection to lifeline %s lost",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(lifeline));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(lifeline));
|
||||
return ORTE_ERR_FATAL;
|
||||
}
|
||||
|
||||
@ -804,8 +804,8 @@ static int binomial_tree(int rank, int parent, int me, int num_procs,
|
||||
child->vpid = peer;
|
||||
OPAL_OUTPUT_VERBOSE((3, orte_routed_base_output,
|
||||
"%s routed:binomial found child %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_VPID_PRINT(child->vpid)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_vpids(child->vpid)));
|
||||
if (NULL != childrn) {
|
||||
/* this is a direct child - add it to my list */
|
||||
opal_list_append(childrn, &child->super);
|
||||
@ -872,15 +872,15 @@ static int update_routing_tree(void)
|
||||
&num_children, &my_children, NULL);
|
||||
|
||||
if (0 < opal_output_get_verbosity(orte_routed_base_output)) {
|
||||
opal_output(0, "%s: parent %d num_children %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), my_parent.vpid, num_children);
|
||||
opal_output(0, "%s: parent %d num_children %d", orte_util_print_name_args(ORTE_PROC_MY_NAME), my_parent.vpid, num_children);
|
||||
for (item = opal_list_get_first(&my_children);
|
||||
item != opal_list_get_end(&my_children);
|
||||
item = opal_list_get_next(item)) {
|
||||
child = (orte_routed_tree_t*)item;
|
||||
opal_output(0, "%s: \tchild %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), child->vpid);
|
||||
opal_output(0, "%s: \tchild %d", orte_util_print_name_args(ORTE_PROC_MY_NAME), child->vpid);
|
||||
for (j=0; j < (int)orte_proc_info.num_procs; j++) {
|
||||
if (opal_bitmap_is_set_bit(&child->relatives, j)) {
|
||||
opal_output(0, "%s: \t\trelation %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), j);
|
||||
opal_output(0, "%s: \t\trelation %d", orte_util_print_name_args(ORTE_PROC_MY_NAME), j);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -90,8 +90,8 @@ static int delete_route(orte_process_name_t *proc)
|
||||
{
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_routed_base_output,
|
||||
"%s routed_direct_delete_route for %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(proc)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(proc)));
|
||||
|
||||
/*There is nothing to do here */
|
||||
|
||||
@ -103,9 +103,9 @@ static int update_route(orte_process_name_t *target,
|
||||
{
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_routed_base_output,
|
||||
"%s routed_direct_update: %s --> %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(target),
|
||||
ORTE_NAME_PRINT(route)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(target),
|
||||
orte_util_print_name_args(route)));
|
||||
|
||||
/*There is nothing to do here */
|
||||
|
||||
@ -127,9 +127,9 @@ static orte_process_name_t get_route(orte_process_name_t *target)
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_routed_base_output,
|
||||
"%s routed_direct_get(%s) --> %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(target),
|
||||
ORTE_NAME_PRINT(ret)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME),
|
||||
orte_util_print_name_args(target),
|
||||
orte_util_print_name_args(ret)));
|
||||
|
||||
return *ret;
|
||||
}
|
||||
@ -153,7 +153,7 @@ static int init_routes(orte_jobid_t job, opal_buffer_t *ndat)
|
||||
*/
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_routed_base_output,
|
||||
"%s routed_direct: init routes w/non-NULL data",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
orte_util_print_name_args(ORTE_PROC_MY_NAME)));
|
||||
|
||||
if (ORTE_SUCCESS != (rc = orte_rml_base_update_contact_info(ndat))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
|
Некоторые файлы не были показаны из-за слишком большого количества измененных файлов Показать больше
Загрузка…
x
Ссылка в новой задаче
Block a user