From d542c9ff2dc57ca5d260d0578fd5c1c556c598c7 Mon Sep 17 00:00:00 2001 From: Gilles Gouaillardet Date: Thu, 6 Nov 2014 15:00:08 +0900 Subject: [PATCH 1/3] btl/openib: send openib modex with the PMIX_GLOBAL flag --- opal/mca/btl/openib/btl_openib_component.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/opal/mca/btl/openib/btl_openib_component.c b/opal/mca/btl/openib/btl_openib_component.c index d876e215d5..4b096643ba 100644 --- a/opal/mca/btl/openib/btl_openib_component.c +++ b/opal/mca/btl/openib/btl_openib_component.c @@ -445,7 +445,7 @@ static int btl_openib_modex_send(void) } /* All done -- send it! */ - OPAL_MODEX_SEND(rc, PMIX_SYNC_REQD, PMIX_REMOTE, + OPAL_MODEX_SEND(rc, PMIX_SYNC_REQD, PMIX_GLOBAL, &mca_btl_openib_component.super.btl_version, message, msg_size); free(message); From 54ddb0aece0892dcdb1a1293a3bd3902b5f3acdc Mon Sep 17 00:00:00 2001 From: George Bosilca Date: Thu, 6 Nov 2014 01:39:25 -0500 Subject: [PATCH 2/3] Don't release the provided opal_proc in the error path. --- opal/mca/btl/tcp/btl_tcp.c | 2 -- opal/mca/btl/tcp/btl_tcp_proc.c | 20 ++++++++++++-------- opal/mca/btl/tcp/btl_tcp_proc.h | 4 ++-- 3 files changed, 14 insertions(+), 12 deletions(-) diff --git a/opal/mca/btl/tcp/btl_tcp.c b/opal/mca/btl/tcp/btl_tcp.c index 6e7e2f4e4c..0b41afa7b5 100644 --- a/opal/mca/btl/tcp/btl_tcp.c +++ b/opal/mca/btl/tcp/btl_tcp.c @@ -98,7 +98,6 @@ int mca_btl_tcp_add_procs( struct mca_btl_base_module_t* btl, tcp_endpoint = OBJ_NEW(mca_btl_tcp_endpoint_t); if(NULL == tcp_endpoint) { OPAL_THREAD_UNLOCK(&tcp_proc->proc_lock); - OBJ_RELEASE(opal_proc); return OPAL_ERR_OUT_OF_RESOURCE; } @@ -106,7 +105,6 @@ int mca_btl_tcp_add_procs( struct mca_btl_base_module_t* btl, rc = mca_btl_tcp_proc_insert(tcp_proc, tcp_endpoint); if(rc != OPAL_SUCCESS) { OPAL_THREAD_UNLOCK(&tcp_proc->proc_lock); - OBJ_RELEASE(opal_proc); OBJ_RELEASE(tcp_endpoint); continue; } diff --git a/opal/mca/btl/tcp/btl_tcp_proc.c b/opal/mca/btl/tcp/btl_tcp_proc.c index 4ae02169b0..b7a49f65bc 100644 --- a/opal/mca/btl/tcp/btl_tcp_proc.c +++ b/opal/mca/btl/tcp/btl_tcp_proc.c @@ -76,12 +76,15 @@ void mca_btl_tcp_proc_construct(mca_btl_tcp_proc_t* tcp_proc) void mca_btl_tcp_proc_destruct(mca_btl_tcp_proc_t* tcp_proc) { - /* remove from list of all proc instances */ - OPAL_THREAD_LOCK(&mca_btl_tcp_component.tcp_lock); - opal_hash_table_remove_value_uint64(&mca_btl_tcp_component.tcp_procs, - tcp_proc->proc_opal->proc_name); - OPAL_THREAD_UNLOCK(&mca_btl_tcp_component.tcp_lock); - + if( NULL != tcp_proc->proc_opal ) { + /* remove from list of all proc instances */ + OPAL_THREAD_LOCK(&mca_btl_tcp_component.tcp_lock); + opal_hash_table_remove_value_uint64(&mca_btl_tcp_component.tcp_procs, + tcp_proc->proc_opal->proc_name); + OPAL_THREAD_UNLOCK(&mca_btl_tcp_component.tcp_lock); + /* Do not OBJ_RELEASE the proc_opal ! */ + /* OBJ_RELEASE(tcp_proc->proc_opal); */ + } /* release resources */ if(NULL != tcp_proc->proc_endpoints) { free(tcp_proc->proc_endpoints); @@ -97,7 +100,7 @@ void mca_btl_tcp_proc_destruct(mca_btl_tcp_proc_t* tcp_proc) * datastructure. */ -mca_btl_tcp_proc_t* mca_btl_tcp_proc_create(const opal_proc_t* proc) +mca_btl_tcp_proc_t* mca_btl_tcp_proc_create(opal_proc_t* proc) { uint64_t hash = proc->proc_name; mca_btl_tcp_proc_t* btl_proc; @@ -116,6 +119,7 @@ mca_btl_tcp_proc_t* mca_btl_tcp_proc_create(const opal_proc_t* proc) if(NULL == btl_proc) return NULL; btl_proc->proc_opal = proc; + OBJ_RETAIN(btl_proc->proc_opal); /* add to hash table of all proc instance */ opal_hash_table_set_value_uint64(&mca_btl_tcp_component.tcp_procs, @@ -683,7 +687,7 @@ int mca_btl_tcp_proc_insert( mca_btl_tcp_proc_t* btl_proc, * Remove an endpoint from the proc array and indicate the address is * no longer in use. */ - + int mca_btl_tcp_proc_remove(mca_btl_tcp_proc_t* btl_proc, mca_btl_base_endpoint_t* btl_endpoint) { size_t i; diff --git a/opal/mca/btl/tcp/btl_tcp_proc.h b/opal/mca/btl/tcp/btl_tcp_proc.h index 9e5c13b4c7..14548386dc 100644 --- a/opal/mca/btl/tcp/btl_tcp_proc.h +++ b/opal/mca/btl/tcp/btl_tcp_proc.h @@ -38,7 +38,7 @@ struct mca_btl_tcp_proc_t { opal_list_item_t super; /**< allow proc to be placed on a list */ - const opal_proc_t *proc_opal; + opal_proc_t *proc_opal; /**< pointer to corresponding opal_proc_t */ struct mca_btl_tcp_addr_t* proc_addrs; @@ -104,7 +104,7 @@ enum mca_btl_tcp_connection_quality { }; -mca_btl_tcp_proc_t* mca_btl_tcp_proc_create(const opal_proc_t* proc); +mca_btl_tcp_proc_t* mca_btl_tcp_proc_create(opal_proc_t* proc); mca_btl_tcp_proc_t* mca_btl_tcp_proc_lookup(const opal_process_name_t* name); int mca_btl_tcp_proc_insert(mca_btl_tcp_proc_t*, mca_btl_base_endpoint_t*); int mca_btl_tcp_proc_remove(mca_btl_tcp_proc_t*, mca_btl_base_endpoint_t*); From ce92c7c30909642d227382a41c4ae3912edf635a Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Thu, 6 Nov 2014 08:40:44 -0800 Subject: [PATCH 3/3] Copy 1.8.4 release NEWS entries --- NEWS | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/NEWS b/NEWS index b187c597cd..974660f970 100644 --- a/NEWS +++ b/NEWS @@ -61,6 +61,24 @@ Trunk (not on release branches yet) via --enable-mpi-cxx. +1.8.4 +----- +- Removed inadvertent change that set --enable-mpi-thread-multiple "on" + by default, thus impacting performance for non-threaded apps +- Significantly reduced startup time by optimizing internal hash table + implementation +- Fixed segfault in neighborhood collectives under certain use-cases +- Fixed Solaris support +- Fixed PMI configure tests for certain Slurm installation patterns +- Fixed param registration issue in Java bindings +- Several man page fixes +- Silence several warnings and close some memory leaks +- Add new PML to improve MXM performance +- Re-enabled the use of CMA and knem in the shared memory BTL +- Updated mpirun manpage to correctly explain new map/rank/binding options +- Numerous updates and performance improvements to OSHMEM + + 1.8.3 -----