Fix a couple of problems with the way we were using orte_process_name_t
structures in the system. Instead of using memcmp, use the ns function. This won't cause a problem as long as all three elements of the name are ints, but if they have different sizes, alignment and padding rules can cause memcmp() to compare padding space, which rarely holds a sane value. This commit was SVN r14998.
Этот коммит содержится в:
родитель
1d11cc4b2d
Коммит
27ad954265
@ -469,7 +469,7 @@ static int mca_btl_tcp_endpoint_recv_connect_ack(mca_btl_base_endpoint_t* btl_en
|
||||
}
|
||||
ORTE_PROCESS_NAME_NTOH(guid);
|
||||
/* compare this to the expected values */
|
||||
if(memcmp(&btl_proc->proc_name, &guid, sizeof(orte_process_name_t)) != 0) {
|
||||
if (0 != orte_ns.compare_fields(ORTE_NS_CMP_ALL, &btl_proc->proc_name, &guid)) {
|
||||
BTL_ERROR(("received unexpected process identifier [%lu,%lu,%lu]",
|
||||
ORTE_NAME_ARGS(&guid)));
|
||||
mca_btl_tcp_endpoint_close(btl_endpoint);
|
||||
|
@ -24,7 +24,8 @@
|
||||
#include "orte/orte_constants.h"
|
||||
#include "opal/util/output.h"
|
||||
#include "orte/class/orte_proc_table.h"
|
||||
|
||||
#include "orte/mca/ns/ns.h"
|
||||
#include "orte/mca/ns/ns_types.h"
|
||||
|
||||
/*
|
||||
* orte_process_name_hash_node_t
|
||||
@ -44,11 +45,13 @@ static OBJ_CLASS_INSTANCE(
|
||||
NULL,
|
||||
NULL);
|
||||
|
||||
#define GET_KEY(proc) \
|
||||
( (((uint32_t) proc->cellid) << 24) + (((uint32_t) proc->jobid) << 16) + ((uint32_t) proc->vpid) )
|
||||
|
||||
void* orte_hash_table_get_proc(opal_hash_table_t* ht,
|
||||
const orte_process_name_t* proc)
|
||||
{
|
||||
uint32_t key = (proc->cellid << 24) + (proc->jobid << 16) + proc->vpid;
|
||||
uint32_t key = GET_KEY(proc);
|
||||
opal_list_t* list = ht->ht_table + (key & ht->ht_mask);
|
||||
orte_proc_hash_node_t *node;
|
||||
|
||||
@ -62,7 +65,7 @@ void* orte_hash_table_get_proc(opal_hash_table_t* ht,
|
||||
for(node = (orte_proc_hash_node_t*)opal_list_get_first(list);
|
||||
node != (orte_proc_hash_node_t*)opal_list_get_end(list);
|
||||
node = (orte_proc_hash_node_t*)opal_list_get_next(node)) {
|
||||
if (memcmp(&node->hn_key,proc,sizeof(orte_process_name_t)) == 0) {
|
||||
if (0 == orte_ns.compare_fields(ORTE_NS_CMP_ALL, &node->hn_key, proc)) {
|
||||
return node->hn_value;
|
||||
}
|
||||
}
|
||||
@ -75,7 +78,7 @@ int orte_hash_table_set_proc(
|
||||
const orte_process_name_t* proc,
|
||||
void* value)
|
||||
{
|
||||
uint32_t key = (proc->cellid << 24) + (proc->jobid << 16) + proc->vpid;
|
||||
uint32_t key = GET_KEY(proc);
|
||||
opal_list_t* list = ht->ht_table + (key & ht->ht_mask);
|
||||
orte_proc_hash_node_t *node;
|
||||
|
||||
@ -89,7 +92,7 @@ int orte_hash_table_set_proc(
|
||||
for(node = (orte_proc_hash_node_t*)opal_list_get_first(list);
|
||||
node != (orte_proc_hash_node_t*)opal_list_get_end(list);
|
||||
node = (orte_proc_hash_node_t*)opal_list_get_next(node)) {
|
||||
if (memcmp(&node->hn_key,proc,sizeof(orte_process_name_t)) == 0) {
|
||||
if (0 == orte_ns.compare_fields(ORTE_NS_CMP_ALL, &node->hn_key, proc)) {
|
||||
node->hn_value = value;
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
@ -113,7 +116,7 @@ int orte_hash_table_remove_proc(
|
||||
opal_hash_table_t* ht,
|
||||
const orte_process_name_t* proc)
|
||||
{
|
||||
uint32_t key = (proc->cellid << 24) + (proc->jobid << 16) + proc->vpid;
|
||||
uint32_t key = GET_KEY(proc);
|
||||
opal_list_t* list = ht->ht_table + (key & ht->ht_mask);
|
||||
orte_proc_hash_node_t *node;
|
||||
|
||||
@ -127,7 +130,7 @@ int orte_hash_table_remove_proc(
|
||||
for(node = (orte_proc_hash_node_t*)opal_list_get_first(list);
|
||||
node != (orte_proc_hash_node_t*)opal_list_get_end(list);
|
||||
node = (orte_proc_hash_node_t*)opal_list_get_next(node)) {
|
||||
if (memcmp(&node->hn_key,proc,sizeof(orte_process_name_t)) == 0) {
|
||||
if (0 == orte_ns.compare_fields(ORTE_NS_CMP_ALL, &node->hn_key, proc)) {
|
||||
opal_list_remove_item(list, (opal_list_item_t*)node);
|
||||
opal_list_append(&ht->ht_nodes, (opal_list_item_t*)node);
|
||||
ht->ht_size--;
|
||||
|
@ -118,10 +118,10 @@ typedef struct orte_process_name_t orte_process_name_t;
|
||||
/*
|
||||
* define invalid values
|
||||
*/
|
||||
#define ORTE_CELLID_INVALID -999
|
||||
#define ORTE_JOBID_INVALID -999
|
||||
#define ORTE_VPID_INVALID -999
|
||||
#define ORTE_NODEID_INVALID -999
|
||||
#define ORTE_CELLID_INVALID (ORTE_CELLID_MIN + 1)
|
||||
#define ORTE_JOBID_INVALID (ORTE_JOBID_MIN + 1)
|
||||
#define ORTE_VPID_INVALID (ORTE_VPID_MIN + 1)
|
||||
#define ORTE_NODEID_INVALID (ORTE_NODEID_MIN + 1)
|
||||
|
||||
/*
|
||||
* define wildcard values (should be -1)
|
||||
@ -152,8 +152,8 @@ ORTE_DECLSPEC extern orte_process_name_t orte_ns_name_my_hnp; /** instantiated
|
||||
* @param name
|
||||
*/
|
||||
#define ORTE_PROCESS_NAME_HTON(n) \
|
||||
n.cellid = htonl(n.cellid); \
|
||||
n.jobid = htonl(n.jobid); \
|
||||
n.cellid = htonl(n.cellid); \
|
||||
n.jobid = htonl(n.jobid); \
|
||||
n.vpid = htonl(n.vpid);
|
||||
|
||||
/**
|
||||
@ -161,9 +161,9 @@ ORTE_DECLSPEC extern orte_process_name_t orte_ns_name_my_hnp; /** instantiated
|
||||
*
|
||||
* @param name
|
||||
*/
|
||||
#define ORTE_PROCESS_NAME_NTOH(n) \
|
||||
n.cellid = ntohl(n.cellid); \
|
||||
n.jobid = ntohl(n.jobid); \
|
||||
#define ORTE_PROCESS_NAME_NTOH(n) \
|
||||
n.cellid = ntohl(n.cellid); \
|
||||
n.jobid = ntohl(n.jobid); \
|
||||
n.vpid = ntohl(n.vpid);
|
||||
|
||||
|
||||
|
@ -230,7 +230,7 @@ mca_oob_tcp_peer_t * mca_oob_tcp_peer_lookup(const orte_process_name_t* name)
|
||||
OPAL_THREAD_LOCK(&mca_oob_tcp_component.tcp_lock);
|
||||
peer = (mca_oob_tcp_peer_t*)orte_hash_table_get_proc(
|
||||
&mca_oob_tcp_component.tcp_peers, name);
|
||||
if(NULL != peer && memcmp(&peer->peer_name,name,sizeof(peer->peer_name)) == 0) {
|
||||
if (NULL != peer && 0 == orte_ns.compare_fields(ORTE_NS_CMP_ALL, &peer->peer_name, name)) {
|
||||
OPAL_THREAD_UNLOCK(&mca_oob_tcp_component.tcp_lock);
|
||||
return peer;
|
||||
}
|
||||
@ -690,7 +690,7 @@ void mca_oob_tcp_peer_close(mca_oob_tcp_peer_t* peer)
|
||||
#endif
|
||||
|
||||
/* if we lose the connection to the seed - abort */
|
||||
if(memcmp(&peer->peer_name,ORTE_PROC_MY_HNP,sizeof(orte_process_name_t)) == 0) {
|
||||
if (0 == orte_ns.compare_fields(ORTE_NS_CMP_ALL, &peer->peer_name, ORTE_PROC_MY_HNP)) {
|
||||
/* If we are not already inside orte_finalize, then call abort */
|
||||
if (ORTE_UNIVERSE_STATE_FINALIZE > orte_universe_info.state) {
|
||||
/* Should free the peer lock before we abort so we don't
|
||||
@ -826,7 +826,7 @@ static int mca_oob_tcp_peer_recv_connect_ack(mca_oob_tcp_peer_t* peer, int sd)
|
||||
}
|
||||
|
||||
/* compare the peers name to the expected value */
|
||||
if(memcmp(&peer->peer_name, &hdr.msg_src, sizeof(orte_process_name_t)) != 0) {
|
||||
if (0 != orte_ns.compare_fields(ORTE_NS_CMP_ALL, &peer->peer_name, &hdr.msg_src)) {
|
||||
opal_output(0, "[%lu,%lu,%lu]-[%lu,%lu,%lu] mca_oob_tcp_peer_recv_connect_ack: "
|
||||
"received unexpected process identifier [%ld,%ld,%ld]\n",
|
||||
ORTE_NAME_ARGS(orte_process_info.my_name),
|
||||
|
@ -46,7 +46,7 @@ int mca_oob_tcp_recv(
|
||||
int i, rc = 0, size = 0;
|
||||
|
||||
if(mca_oob_tcp_component.tcp_debug >= OOB_TCP_DEBUG_ALL) {
|
||||
opal_output(0, "[%lu,%lu,%lu]-[%lu,%lu,%lu] mca_oob_tcp_recv: tag %d\n",
|
||||
opal_output(0, "[%lu,%lu,%lu]-[%ld,%ld,%ld] mca_oob_tcp_recv: tag %d\n",
|
||||
ORTE_NAME_ARGS(orte_process_info.my_name),
|
||||
ORTE_NAME_ARGS(peer),
|
||||
tag);
|
||||
@ -65,7 +65,7 @@ int mca_oob_tcp_recv(
|
||||
}
|
||||
|
||||
if (mca_oob_tcp_component.tcp_debug >= OOB_TCP_DEBUG_INFO) {
|
||||
opal_output(0, "[%lu,%lu,%lu]-[%lu,%lu,%lu] mca_oob_tcp_recv*unexpected*: tag %d size %lu\n",
|
||||
opal_output(0, "[%lu,%lu,%lu]-[%ld,%ld,%ld] mca_oob_tcp_recv*unexpected*: tag %d size %lu\n",
|
||||
ORTE_NAME_ARGS(orte_process_info.my_name),
|
||||
ORTE_NAME_ARGS(peer),
|
||||
tag, (unsigned long)(msg->msg_hdr.msg_size) );
|
||||
@ -117,7 +117,7 @@ int mca_oob_tcp_recv(
|
||||
}
|
||||
|
||||
if (mca_oob_tcp_component.tcp_debug >= OOB_TCP_DEBUG_INFO) {
|
||||
opal_output(0, "[%lu,%lu,%lu]-[%lu,%lu,%lu] mca_oob_tcp_recv*expected*: tag %d size %lu\n",
|
||||
opal_output(0, "[%lu,%lu,%lu]-[%ld,%ld,%ld] mca_oob_tcp_recv*expected*: tag %d size %lu\n",
|
||||
ORTE_NAME_ARGS(orte_process_info.my_name),
|
||||
ORTE_NAME_ARGS(peer),
|
||||
tag, (unsigned long)(size) );
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user