1
1

First attempt to thread safe the registry and name server subsystems. Comment out the duplicate calls to register processes in mpi_init and mpirun2.

This commit was SVN r2697.
Этот коммит содержится в:
Ralph Castain 2004-09-16 04:14:35 +00:00
родитель 3aa0b648e2
Коммит d0e308fbc4
12 изменённых файлов: 322 добавлений и 101 удалений

Просмотреть файл

@ -9,6 +9,8 @@
#include <string.h> #include <string.h>
#include "threads/mutex.h"
#include "util/output.h" #include "util/output.h"
#include "util/proc_info.h" #include "util/proc_info.h"
@ -408,6 +410,8 @@ int gpr_proxy_subscribe(ompi_registry_mode_t mode,
} }
} }
OMPI_THREAD_LOCK(&mca_gpr_proxy_mutex);
/* store callback function and user_tag in local list for lookup */ /* store callback function and user_tag in local list for lookup */
/* generate id_tag to send to replica to identify lookup entry */ /* generate id_tag to send to replica to identify lookup entry */
trackptr = OBJ_NEW(mca_gpr_notify_request_tracker_t); trackptr = OBJ_NEW(mca_gpr_notify_request_tracker_t);
@ -450,6 +454,7 @@ int gpr_proxy_subscribe(ompi_registry_mode_t mode,
ompi_list_append(&mca_gpr_proxy_notify_request_tracker, &trackptr->item); ompi_list_append(&mca_gpr_proxy_notify_request_tracker, &trackptr->item);
ompi_buffer_free(answer); ompi_buffer_free(answer);
ompi_buffer_free(cmd); ompi_buffer_free(cmd);
OMPI_THREAD_UNLOCK(&mca_gpr_proxy_mutex);
return OMPI_SUCCESS; return OMPI_SUCCESS;
CLEANUP: CLEANUP:
@ -458,6 +463,7 @@ int gpr_proxy_subscribe(ompi_registry_mode_t mode,
OBJ_RELEASE(trackptr); OBJ_RELEASE(trackptr);
} }
ompi_buffer_free(cmd); ompi_buffer_free(cmd);
OMPI_THREAD_UNLOCK(&mca_gpr_proxy_mutex);
return OMPI_ERROR; return OMPI_ERROR;
} }
@ -552,6 +558,7 @@ int gpr_proxy_unsubscribe(ompi_registry_mode_t mode,
goto CLEANUP; goto CLEANUP;
} }
OMPI_THREAD_LOCK(&mca_gpr_proxy_mutex);
/* locate corresponding entry on proxy tracker list and remove it */ /* locate corresponding entry on proxy tracker list and remove it */
for (trackptr = (mca_gpr_notify_request_tracker_t*)ompi_list_get_first(&mca_gpr_proxy_notify_request_tracker); for (trackptr = (mca_gpr_notify_request_tracker_t*)ompi_list_get_first(&mca_gpr_proxy_notify_request_tracker);
trackptr != (mca_gpr_notify_request_tracker_t*)ompi_list_get_end(&mca_gpr_proxy_notify_request_tracker) && trackptr != (mca_gpr_notify_request_tracker_t*)ompi_list_get_end(&mca_gpr_proxy_notify_request_tracker) &&
@ -570,10 +577,12 @@ int gpr_proxy_unsubscribe(ompi_registry_mode_t mode,
OBJ_RELEASE(trackptr); OBJ_RELEASE(trackptr);
ompi_buffer_free(answer); ompi_buffer_free(answer);
ompi_buffer_free(cmd); ompi_buffer_free(cmd);
OMPI_THREAD_UNLOCK(&mca_gpr_proxy_mutex);
return OMPI_SUCCESS; return OMPI_SUCCESS;
CLEANUP: CLEANUP:
ompi_buffer_free(cmd); ompi_buffer_free(cmd);
OMPI_THREAD_UNLOCK(&mca_gpr_proxy_mutex);
return OMPI_ERROR; return OMPI_ERROR;
} }
@ -655,6 +664,7 @@ int gpr_proxy_synchro(ompi_registry_synchro_mode_t synchro_mode,
goto CLEANUP; goto CLEANUP;
} }
OMPI_THREAD_LOCK(&mca_gpr_proxy_mutex);
/* store callback function and user_tag in local list for lookup */ /* store callback function and user_tag in local list for lookup */
/* generate id_tag to send to replica to identify lookup entry */ /* generate id_tag to send to replica to identify lookup entry */
trackptr = OBJ_NEW(mca_gpr_notify_request_tracker_t); trackptr = OBJ_NEW(mca_gpr_notify_request_tracker_t);
@ -697,6 +707,7 @@ int gpr_proxy_synchro(ompi_registry_synchro_mode_t synchro_mode,
ompi_list_append(&mca_gpr_proxy_notify_request_tracker, &trackptr->item); ompi_list_append(&mca_gpr_proxy_notify_request_tracker, &trackptr->item);
ompi_buffer_free(answer); ompi_buffer_free(answer);
ompi_buffer_free(cmd); ompi_buffer_free(cmd);
OMPI_THREAD_UNLOCK(&mca_gpr_proxy_mutex);
return OMPI_SUCCESS; return OMPI_SUCCESS;
CLEANUP: CLEANUP:
@ -705,6 +716,7 @@ int gpr_proxy_synchro(ompi_registry_synchro_mode_t synchro_mode,
OBJ_RELEASE(trackptr); OBJ_RELEASE(trackptr);
} }
ompi_buffer_free(cmd); ompi_buffer_free(cmd);
OMPI_THREAD_UNLOCK(&mca_gpr_proxy_mutex);
return OMPI_ERROR; return OMPI_ERROR;
} }
@ -805,6 +817,7 @@ int gpr_proxy_cancel_synchro(ompi_registry_synchro_mode_t synchro_mode,
goto CLEANUP; goto CLEANUP;
} }
OMPI_THREAD_LOCK(&mca_gpr_proxy_mutex);
/* locate corresponding entry on proxy tracker list and remove it */ /* locate corresponding entry on proxy tracker list and remove it */
for (trackptr = (mca_gpr_notify_request_tracker_t*)ompi_list_get_first(&mca_gpr_proxy_notify_request_tracker); for (trackptr = (mca_gpr_notify_request_tracker_t*)ompi_list_get_first(&mca_gpr_proxy_notify_request_tracker);
trackptr != (mca_gpr_notify_request_tracker_t*)ompi_list_get_end(&mca_gpr_proxy_notify_request_tracker) && trackptr != (mca_gpr_notify_request_tracker_t*)ompi_list_get_end(&mca_gpr_proxy_notify_request_tracker) &&
@ -823,10 +836,12 @@ int gpr_proxy_cancel_synchro(ompi_registry_synchro_mode_t synchro_mode,
OBJ_RELEASE(trackptr); OBJ_RELEASE(trackptr);
ompi_buffer_free(answer); ompi_buffer_free(answer);
ompi_buffer_free(cmd); ompi_buffer_free(cmd);
OMPI_THREAD_UNLOCK(&mca_gpr_proxy_mutex);
return OMPI_SUCCESS; return OMPI_SUCCESS;
CLEANUP: CLEANUP:
ompi_buffer_free(cmd); ompi_buffer_free(cmd);
OMPI_THREAD_UNLOCK(&mca_gpr_proxy_mutex);
return OMPI_ERROR; return OMPI_ERROR;
} }

Просмотреть файл

@ -10,6 +10,9 @@
#include "ompi_config.h" #include "ompi_config.h"
#include "include/types.h" #include "include/types.h"
#include "include/constants.h" #include "include/constants.h"
#include "threads/mutex.h"
#include "class/ompi_list.h" #include "class/ompi_list.h"
#include "mca/gpr/base/base.h" #include "mca/gpr/base/base.h"
@ -40,6 +43,7 @@ extern ompi_list_t mca_gpr_proxy_notify_request_tracker;
extern mca_gpr_notify_id_t mca_gpr_proxy_last_notify_id_tag; extern mca_gpr_notify_id_t mca_gpr_proxy_last_notify_id_tag;
extern ompi_list_t mca_gpr_proxy_free_notify_id_tags; extern ompi_list_t mca_gpr_proxy_free_notify_id_tags;
extern int mca_gpr_proxy_debug; extern int mca_gpr_proxy_debug;
extern ompi_mutex_t mca_gpr_proxy_mutex;
/* /*
* Implementation of delete_segment(). * Implementation of delete_segment().

Просмотреть файл

@ -14,6 +14,9 @@
#include "ompi_config.h" #include "ompi_config.h"
#include "include/constants.h" #include "include/constants.h"
#include "threads/mutex.h"
#include "util/proc_info.h" #include "util/proc_info.h"
#include "util/output.h" #include "util/output.h"
#include "mca/mca.h" #include "mca/mca.h"
@ -74,6 +77,7 @@ ompi_list_t mca_gpr_proxy_notify_request_tracker;
mca_gpr_notify_id_t mca_gpr_proxy_last_notify_id_tag; mca_gpr_notify_id_t mca_gpr_proxy_last_notify_id_tag;
ompi_list_t mca_gpr_proxy_free_notify_id_tags; ompi_list_t mca_gpr_proxy_free_notify_id_tags;
int mca_gpr_proxy_debug; int mca_gpr_proxy_debug;
ompi_mutex_t mca_gpr_proxy_mutex;
/* /*
@ -126,6 +130,9 @@ mca_gpr_base_module_t* mca_gpr_proxy_init(bool *allow_multi_user_threads, bool *
*allow_multi_user_threads = true; *allow_multi_user_threads = true;
*have_hidden_threads = false; *have_hidden_threads = false;
/* setup thread lock */
OBJ_CONSTRUCT(&mca_gpr_proxy_mutex, ompi_mutex_t);
/* define the replica for us to use - get it from process_info */ /* define the replica for us to use - get it from process_info */
mca_gpr_my_replica = ompi_name_server.copy_process_name(ompi_process_info.gpr_replica); mca_gpr_my_replica = ompi_name_server.copy_process_name(ompi_process_info.gpr_replica);
if (NULL == mca_gpr_my_replica) { /* can't function */ if (NULL == mca_gpr_my_replica) { /* can't function */
@ -246,6 +253,8 @@ void mca_gpr_proxy_notify_recv(int status, ompi_process_name_t* sender,
message->tokens = NULL; message->tokens = NULL;
} }
OMPI_THREAD_LOCK(&mca_gpr_proxy_mutex);
/* find the request corresponding to this notify */ /* find the request corresponding to this notify */
found = false; found = false;
for (trackptr = (mca_gpr_notify_request_tracker_t*)ompi_list_get_first(&mca_gpr_proxy_notify_request_tracker); for (trackptr = (mca_gpr_notify_request_tracker_t*)ompi_list_get_first(&mca_gpr_proxy_notify_request_tracker);
@ -259,6 +268,7 @@ void mca_gpr_proxy_notify_recv(int status, ompi_process_name_t* sender,
if (!found) { /* didn't find request */ if (!found) { /* didn't find request */
ompi_output(0, "Proxy notification error - received request not found"); ompi_output(0, "Proxy notification error - received request not found");
OMPI_THREAD_UNLOCK(&mca_gpr_proxy_mutex);
return; return;
} }
@ -270,6 +280,8 @@ void mca_gpr_proxy_notify_recv(int status, ompi_process_name_t* sender,
RETURN_ERROR: RETURN_ERROR:
OBJ_RELEASE(message); OBJ_RELEASE(message);
OMPI_THREAD_UNLOCK(&mca_gpr_proxy_mutex);
/* reissue non-blocking receive */ /* reissue non-blocking receive */
mca_oob_recv_packed_nb(MCA_OOB_NAME_ANY, MCA_OOB_TAG_GPR_NOTIFY, 0, mca_gpr_proxy_notify_recv, NULL); mca_oob_recv_packed_nb(MCA_OOB_NAME_ANY, MCA_OOB_TAG_GPR_NOTIFY, 0, mca_gpr_proxy_notify_recv, NULL);

Просмотреть файл

@ -20,6 +20,9 @@
#include <libgen.h> #include <libgen.h>
#include "include/constants.h" #include "include/constants.h"
#include "threads/mutex.h"
#include "util/output.h" #include "util/output.h"
#include "util/proc_info.h" #include "util/proc_info.h"
#include "mca/gpr/base/base.h" #include "mca/gpr/base/base.h"
@ -31,6 +34,8 @@ int gpr_replica_delete_segment(char *segment)
{ {
mca_gpr_replica_segment_t *seg; mca_gpr_replica_segment_t *seg;
OMPI_THREAD_LOCK(&mca_gpr_replica_mutex);
if (mca_gpr_replica_debug) { if (mca_gpr_replica_debug) {
ompi_output(0, "[%d,%d,%d] gpr replica: delete_segment entered", ompi_process_info.name->cellid, ompi_output(0, "[%d,%d,%d] gpr replica: delete_segment entered", ompi_process_info.name->cellid,
ompi_process_info.name->jobid, ompi_process_info.name->vpid); ompi_process_info.name->jobid, ompi_process_info.name->vpid);
@ -45,9 +50,11 @@ int gpr_replica_delete_segment(char *segment)
OBJ_RELEASE(seg); OBJ_RELEASE(seg);
if (OMPI_SUCCESS != gpr_replica_delete_key(segment, NULL)) { /* couldn't remove dictionary entry */ if (OMPI_SUCCESS != gpr_replica_delete_key(segment, NULL)) { /* couldn't remove dictionary entry */
OMPI_THREAD_UNLOCK(&mca_gpr_replica_mutex);
return OMPI_ERROR; return OMPI_ERROR;
} }
OMPI_THREAD_UNLOCK(&mca_gpr_replica_mutex);
return OMPI_SUCCESS; return OMPI_SUCCESS;
} }
@ -87,6 +94,8 @@ int gpr_replica_put(ompi_registry_mode_t addr_mode, char *segment,
*/ */
put_mode = addr_mode & OMPI_REGISTRY_OVERWRITE; put_mode = addr_mode & OMPI_REGISTRY_OVERWRITE;
OMPI_THREAD_LOCK(&mca_gpr_replica_mutex);
/* find the segment */ /* find the segment */
seg = gpr_replica_find_seg(true, segment); seg = gpr_replica_find_seg(true, segment);
if (NULL == seg) { /* couldn't find segment or create it */ if (NULL == seg) { /* couldn't find segment or create it */
@ -189,6 +198,8 @@ int gpr_replica_put(ompi_registry_mode_t addr_mode, char *segment,
ompi_process_info.name->jobid, ompi_process_info.name->vpid); ompi_process_info.name->jobid, ompi_process_info.name->vpid);
} }
OMPI_THREAD_UNLOCK(&mca_gpr_replica_mutex);
return return_code; return return_code;
} }
@ -218,9 +229,12 @@ int gpr_replica_delete_object(ompi_registry_mode_t addr_mode,
return OMPI_ERROR; return OMPI_ERROR;
} }
OMPI_THREAD_LOCK(&mca_gpr_replica_mutex);
/* find the specified segment */ /* find the specified segment */
seg = gpr_replica_find_seg(false, segment); seg = gpr_replica_find_seg(false, segment);
if (NULL == seg) { /* segment not found */ if (NULL == seg) { /* segment not found */
OMPI_THREAD_UNLOCK(&mca_gpr_replica_mutex);
return OMPI_ERROR; return OMPI_ERROR;
} }
@ -301,6 +315,9 @@ int gpr_replica_delete_object(ompi_registry_mode_t addr_mode,
if (NULL != keys) { if (NULL != keys) {
free(keys); free(keys);
} }
OMPI_THREAD_UNLOCK(&mca_gpr_replica_mutex);
return return_code; return return_code;
} }
@ -316,6 +333,8 @@ ompi_list_t* gpr_replica_index(char *segment)
ompi_process_info.name->jobid, ompi_process_info.name->vpid, segment); ompi_process_info.name->jobid, ompi_process_info.name->vpid, segment);
} }
OMPI_THREAD_LOCK(&mca_gpr_replica_mutex);
answer = OBJ_NEW(ompi_list_t); answer = OBJ_NEW(ompi_list_t);
if (NULL == segment) { /* looking for index of global registry */ if (NULL == segment) { /* looking for index of global registry */
@ -330,6 +349,7 @@ ompi_list_t* gpr_replica_index(char *segment)
/* find the specified segment */ /* find the specified segment */
seg = gpr_replica_find_seg(false, segment); seg = gpr_replica_find_seg(false, segment);
if (NULL == seg) { /* segment not found */ if (NULL == seg) { /* segment not found */
OMPI_THREAD_UNLOCK(&mca_gpr_replica_mutex);
return answer; return answer;
} }
/* got segment - now index that dictionary */ /* got segment - now index that dictionary */
@ -343,6 +363,7 @@ ompi_list_t* gpr_replica_index(char *segment)
} }
OMPI_THREAD_UNLOCK(&mca_gpr_replica_mutex);
return answer; return answer;
} }
@ -367,6 +388,8 @@ int gpr_replica_subscribe(ompi_registry_mode_t addr_mode,
return OMPI_ERROR; return OMPI_ERROR;
} }
OMPI_THREAD_LOCK(&mca_gpr_replica_mutex);
/* enter request on notify tracking system */ /* enter request on notify tracking system */
trackptr = OBJ_NEW(mca_gpr_notify_request_tracker_t); trackptr = OBJ_NEW(mca_gpr_notify_request_tracker_t);
trackptr->requestor = NULL; trackptr->requestor = NULL;
@ -393,9 +416,11 @@ int gpr_replica_subscribe(ompi_registry_mode_t addr_mode,
notify_msg->trig_synchro = OMPI_REGISTRY_SYNCHRO_MODE_NONE; notify_msg->trig_synchro = OMPI_REGISTRY_SYNCHRO_MODE_NONE;
gpr_replica_process_triggers(segment, trig, notify_msg); gpr_replica_process_triggers(segment, trig, notify_msg);
} }
OMPI_THREAD_UNLOCK(&mca_gpr_replica_mutex);
return OMPI_SUCCESS; return OMPI_SUCCESS;
} else { } else {
OBJ_RELEASE(trackptr); OBJ_RELEASE(trackptr);
OMPI_THREAD_UNLOCK(&mca_gpr_replica_mutex);
return OMPI_ERROR; return OMPI_ERROR;
} }
} }
@ -418,9 +443,12 @@ int gpr_replica_unsubscribe(ompi_registry_mode_t addr_mode,
return OMPI_ERROR; return OMPI_ERROR;
} }
OMPI_THREAD_LOCK(&mca_gpr_replica_mutex);
/* find trigger on replica - return id_tag */ /* find trigger on replica - return id_tag */
if (MCA_GPR_NOTIFY_ID_MAX == (id_tag = gpr_replica_remove_trigger(OMPI_REGISTRY_SYNCHRO_MODE_NONE, action, if (MCA_GPR_NOTIFY_ID_MAX == (id_tag = gpr_replica_remove_trigger(OMPI_REGISTRY_SYNCHRO_MODE_NONE, action,
addr_mode, segment, tokens, 0))) { addr_mode, segment, tokens, 0))) {
OMPI_THREAD_UNLOCK(&mca_gpr_replica_mutex);
return OMPI_ERROR; return OMPI_ERROR;
} }
@ -435,10 +463,12 @@ int gpr_replica_unsubscribe(ompi_registry_mode_t addr_mode,
ompi_list_remove_item(&mca_gpr_replica_notify_request_tracker, &trackptr->item); ompi_list_remove_item(&mca_gpr_replica_notify_request_tracker, &trackptr->item);
OBJ_RELEASE(trackptr); OBJ_RELEASE(trackptr);
OMPI_THREAD_UNLOCK(&mca_gpr_replica_mutex);
return OMPI_SUCCESS; return OMPI_SUCCESS;
} }
/* if we get here, then couldn't find request */ /* if we get here, then couldn't find request */
OMPI_THREAD_UNLOCK(&mca_gpr_replica_mutex);
return OMPI_ERROR; return OMPI_ERROR;
} }
@ -464,6 +494,7 @@ int gpr_replica_synchro(ompi_registry_synchro_mode_t synchro_mode,
return OMPI_ERROR; return OMPI_ERROR;
} }
OMPI_THREAD_LOCK(&mca_gpr_replica_mutex);
/* enter request on notify tracking system */ /* enter request on notify tracking system */
trackptr = OBJ_NEW(mca_gpr_notify_request_tracker_t); trackptr = OBJ_NEW(mca_gpr_notify_request_tracker_t);
trackptr->requestor = NULL; trackptr->requestor = NULL;
@ -493,8 +524,10 @@ int gpr_replica_synchro(ompi_registry_synchro_mode_t synchro_mode,
notify_msg->trig_synchro = trig->synch_mode; notify_msg->trig_synchro = trig->synch_mode;
gpr_replica_process_triggers(segment, trig, notify_msg); gpr_replica_process_triggers(segment, trig, notify_msg);
} }
OMPI_THREAD_UNLOCK(&mca_gpr_replica_mutex);
return OMPI_SUCCESS; return OMPI_SUCCESS;
} else { } else {
OMPI_THREAD_UNLOCK(&mca_gpr_replica_mutex);
OBJ_RELEASE(trackptr); OBJ_RELEASE(trackptr);
return OMPI_ERROR; return OMPI_ERROR;
} }
@ -518,9 +551,11 @@ int gpr_replica_cancel_synchro(ompi_registry_synchro_mode_t synchro_mode,
return OMPI_ERROR; return OMPI_ERROR;
} }
OMPI_THREAD_LOCK(&mca_gpr_replica_mutex);
/* find trigger on replica - return id_tag */ /* find trigger on replica - return id_tag */
if (MCA_GPR_NOTIFY_ID_MAX == (id_tag = gpr_replica_remove_trigger(synchro_mode, OMPI_REGISTRY_NOTIFY_NONE, if (MCA_GPR_NOTIFY_ID_MAX == (id_tag = gpr_replica_remove_trigger(synchro_mode, OMPI_REGISTRY_NOTIFY_NONE,
addr_mode, segment, tokens, trigger))) { addr_mode, segment, tokens, trigger))) {
OMPI_THREAD_UNLOCK(&mca_gpr_replica_mutex);
return OMPI_ERROR; return OMPI_ERROR;
} }
@ -535,10 +570,12 @@ int gpr_replica_cancel_synchro(ompi_registry_synchro_mode_t synchro_mode,
ompi_list_remove_item(&mca_gpr_replica_notify_request_tracker, &trackptr->item); ompi_list_remove_item(&mca_gpr_replica_notify_request_tracker, &trackptr->item);
OBJ_RELEASE(trackptr); OBJ_RELEASE(trackptr);
OMPI_THREAD_UNLOCK(&mca_gpr_replica_mutex);
return OMPI_SUCCESS; return OMPI_SUCCESS;
} }
/* if we get here, then couldn't find request */ /* if we get here, then couldn't find request */
OMPI_THREAD_UNLOCK(&mca_gpr_replica_mutex);
return OMPI_ERROR; return OMPI_ERROR;
} }
@ -566,9 +603,11 @@ ompi_list_t* gpr_replica_get(ompi_registry_mode_t addr_mode,
return answer; return answer;
} }
OMPI_THREAD_LOCK(&mca_gpr_replica_mutex);
/* find the specified segment */ /* find the specified segment */
seg = gpr_replica_find_seg(false, segment); seg = gpr_replica_find_seg(false, segment);
if (NULL == seg) { /* segment not found */ if (NULL == seg) { /* segment not found */
OMPI_THREAD_UNLOCK(&mca_gpr_replica_mutex);
return answer; return answer;
} }
if (mca_gpr_replica_debug) { if (mca_gpr_replica_debug) {
@ -584,6 +623,7 @@ ompi_list_t* gpr_replica_get(ompi_registry_mode_t addr_mode,
/* convert tokens to list of keys */ /* convert tokens to list of keys */
keylist = gpr_replica_get_key_list(segment, tokens); keylist = gpr_replica_get_key_list(segment, tokens);
if (0 == (num_tokens = ompi_list_get_size(keylist))) { if (0 == (num_tokens = ompi_list_get_size(keylist))) {
OMPI_THREAD_UNLOCK(&mca_gpr_replica_mutex);
return answer; return answer;
} }
@ -640,6 +680,7 @@ ompi_list_t* gpr_replica_get(ompi_registry_mode_t addr_mode,
ompi_process_info.name->jobid, ompi_process_info.name->vpid); ompi_process_info.name->jobid, ompi_process_info.name->vpid);
} }
OMPI_THREAD_UNLOCK(&mca_gpr_replica_mutex);
return answer; return answer;
} }

Просмотреть файл

@ -180,6 +180,7 @@ extern ompi_list_t mca_gpr_replica_notify_request_tracker;
extern mca_gpr_notify_id_t mca_gpr_replica_last_notify_id_tag; extern mca_gpr_notify_id_t mca_gpr_replica_last_notify_id_tag;
extern ompi_list_t mca_gpr_replica_free_notify_id_tags; extern ompi_list_t mca_gpr_replica_free_notify_id_tags;
extern int mca_gpr_replica_debug; extern int mca_gpr_replica_debug;
extern ompi_mutex_t mca_gpr_replica_mutex, mca_gpr_replica_internals_mutex;
/* /*
* Module open / close * Module open / close

Просмотреть файл

@ -16,6 +16,9 @@
#include <time.h> #include <time.h>
#include "include/constants.h" #include "include/constants.h"
#include "threads/mutex.h"
#include "util/proc_info.h" #include "util/proc_info.h"
#include "util/output.h" #include "util/output.h"
#include "util/pack.h" #include "util/pack.h"
@ -79,6 +82,8 @@ ompi_list_t mca_gpr_replica_notify_request_tracker;
mca_gpr_notify_id_t mca_gpr_replica_last_notify_id_tag; mca_gpr_notify_id_t mca_gpr_replica_last_notify_id_tag;
ompi_list_t mca_gpr_replica_free_notify_id_tags; ompi_list_t mca_gpr_replica_free_notify_id_tags;
int mca_gpr_replica_debug; int mca_gpr_replica_debug;
ompi_mutex_t mca_gpr_replica_component_mutex;
ompi_mutex_t mca_gpr_replica_mutex, mca_gpr_replica_internals_mutex;
/* constructor - used to initialize state of keytable instance */ /* constructor - used to initialize state of keytable instance */
@ -324,6 +329,11 @@ mca_gpr_base_module_t *mca_gpr_replica_init(bool *allow_multi_user_threads, bool
*allow_multi_user_threads = true; *allow_multi_user_threads = true;
*have_hidden_threads = false; *have_hidden_threads = false;
/* setup the thread locks */
OBJ_CONSTRUCT(&mca_gpr_replica_component_mutex, ompi_mutex_t);
OBJ_CONSTRUCT(&mca_gpr_replica_internals_mutex, ompi_mutex_t);
OBJ_CONSTRUCT(&mca_gpr_replica_mutex, ompi_mutex_t);
/* initialize the registry head */ /* initialize the registry head */
OBJ_CONSTRUCT(&mca_gpr_replica_head.registry, ompi_list_t); OBJ_CONSTRUCT(&mca_gpr_replica_head.registry, ompi_list_t);
@ -377,44 +387,44 @@ int mca_gpr_replica_finalize(void)
ompi_output(0, "finalizing gpr replica"); ompi_output(0, "finalizing gpr replica");
} }
/* mca_gpr_replica_segment_t *seg; */ /* mca_gpr_replica_segment_t *seg; */
/* mca_gpr_replica_keytable_t *kt; */ /* mca_gpr_replica_keytable_t *kt; */
/* mca_gpr_replica_keylist_t *kl; */ /* mca_gpr_replica_keylist_t *kl; */
/* mca_gpr_notify_request_tracker_t *tk; */ /* mca_gpr_notify_request_tracker_t *tk; */
/* mca_gpr_idtag_list_t *id; */ /* mca_gpr_idtag_list_t *id; */
/* /\* free all storage, but only if this component was initialized *\/ */ /* /\* free all storage, but only if this component was initialized *\/ */
/* if (initialized) { */ /* if (initialized) { */
/* while (NULL != (seg = (mca_gpr_replica_segment_t*)ompi_list_remove_first(&mca_gpr_replica_head.registry))) { */ /* while (NULL != (seg = (mca_gpr_replica_segment_t*)ompi_list_remove_first(&mca_gpr_replica_head.registry))) { */
/* OBJ_RELEASE(seg); */ /* OBJ_RELEASE(seg); */
/* } */ /* } */
/* OBJ_DESTRUCT(&mca_gpr_replica_head.registry); */ /* OBJ_DESTRUCT(&mca_gpr_replica_head.registry); */
/* while (NULL != (kt = (mca_gpr_replica_keytable_t*)ompi_list_remove_first(&mca_gpr_replica_head.segment_dict))) { */ /* while (NULL != (kt = (mca_gpr_replica_keytable_t*)ompi_list_remove_first(&mca_gpr_replica_head.segment_dict))) { */
/* OBJ_RELEASE(kt); */ /* OBJ_RELEASE(kt); */
/* } */ /* } */
/* OBJ_DESTRUCT(&mca_gpr_replica_head.segment_dict); */ /* OBJ_DESTRUCT(&mca_gpr_replica_head.segment_dict); */
/* while (NULL != (kl = (mca_gpr_replica_keylist_t*)ompi_list_remove_first(&mca_gpr_replica_head.freekeys))) { */ /* while (NULL != (kl = (mca_gpr_replica_keylist_t*)ompi_list_remove_first(&mca_gpr_replica_head.freekeys))) { */
/* OBJ_RELEASE(kl); */ /* OBJ_RELEASE(kl); */
/* } */ /* } */
/* OBJ_DESTRUCT(&mca_gpr_replica_head.freekeys); */ /* OBJ_DESTRUCT(&mca_gpr_replica_head.freekeys); */
/* while (NULL != (tk = (mca_gpr_notify_request_tracker_t*)ompi_list_remove_first(&mca_gpr_replica_notify_request_tracker))) { */ /* while (NULL != (tk = (mca_gpr_notify_request_tracker_t*)ompi_list_remove_first(&mca_gpr_replica_notify_request_tracker))) { */
/* OBJ_RELEASE(tk); */ /* OBJ_RELEASE(tk); */
/* } */ /* } */
/* OBJ_DESTRUCT(&mca_gpr_replica_notify_request_tracker); */ /* OBJ_DESTRUCT(&mca_gpr_replica_notify_request_tracker); */
/* while (NULL != (id = (mca_gpr_idtag_list_t*)ompi_list_remove_first(&mca_gpr_replica_free_notify_id_tags))) { */ /* while (NULL != (id = (mca_gpr_idtag_list_t*)ompi_list_remove_first(&mca_gpr_replica_free_notify_id_tags))) { */
/* OBJ_RELEASE(id); */ /* OBJ_RELEASE(id); */
/* } */ /* } */
/* OBJ_DESTRUCT(&mca_gpr_replica_free_notify_id_tags); */ /* OBJ_DESTRUCT(&mca_gpr_replica_free_notify_id_tags); */
/* initialized = false; */ /* initialized = false; */
/* } */ /* } */
/* All done */ /* All done */
@ -733,6 +743,9 @@ void mca_gpr_replica_recv(int status, ompi_process_name_t* sender,
goto RETURN_ERROR; goto RETURN_ERROR;
} }
/******* LOCK *****/
OMPI_THREAD_LOCK(&mca_gpr_replica_component_mutex);
/* enter request on notify tracking system */ /* enter request on notify tracking system */
trackptr = OBJ_NEW(mca_gpr_notify_request_tracker_t); trackptr = OBJ_NEW(mca_gpr_notify_request_tracker_t);
trackptr->requestor = ompi_name_server.copy_process_name(sender); trackptr->requestor = ompi_name_server.copy_process_name(sender);
@ -748,6 +761,9 @@ void mca_gpr_replica_recv(int status, ompi_process_name_t* sender,
} }
ompi_list_append(&mca_gpr_replica_notify_request_tracker, &trackptr->item); ompi_list_append(&mca_gpr_replica_notify_request_tracker, &trackptr->item);
OMPI_THREAD_UNLOCK(&mca_gpr_replica_component_mutex);
/****** UNLOCK ******/
response = (int32_t)gpr_replica_construct_trigger(OMPI_REGISTRY_SYNCHRO_MODE_NONE, action, response = (int32_t)gpr_replica_construct_trigger(OMPI_REGISTRY_SYNCHRO_MODE_NONE, action,
mode, segment, tokens, mode, segment, tokens,
0, trackptr->id_tag); 0, trackptr->id_tag);
@ -806,6 +822,11 @@ void mca_gpr_replica_recv(int status, ompi_process_name_t* sender,
segment, tokens, 0); segment, tokens, 0);
if (MCA_GPR_NOTIFY_ID_MAX != id_tag) { /* removed trigger successfully */ if (MCA_GPR_NOTIFY_ID_MAX != id_tag) { /* removed trigger successfully */
/******* LOCK *****/
OMPI_THREAD_LOCK(&mca_gpr_replica_component_mutex);
/* find request on replica notify tracking system */ /* find request on replica notify tracking system */
for (trackptr = (mca_gpr_notify_request_tracker_t*)ompi_list_get_first(&mca_gpr_replica_notify_request_tracker); for (trackptr = (mca_gpr_notify_request_tracker_t*)ompi_list_get_first(&mca_gpr_replica_notify_request_tracker);
trackptr != (mca_gpr_notify_request_tracker_t*)ompi_list_get_end(&mca_gpr_replica_notify_request_tracker) && trackptr != (mca_gpr_notify_request_tracker_t*)ompi_list_get_end(&mca_gpr_replica_notify_request_tracker) &&
@ -828,6 +849,10 @@ void mca_gpr_replica_recv(int status, ompi_process_name_t* sender,
ompi_list_append(&mca_gpr_replica_free_notify_id_tags, &ptr_free_id->item); ompi_list_append(&mca_gpr_replica_free_notify_id_tags, &ptr_free_id->item);
/* release tracker item */ /* release tracker item */
OBJ_RELEASE(trackptr); OBJ_RELEASE(trackptr);
OMPI_THREAD_UNLOCK(&mca_gpr_replica_component_mutex);
/****** UNLOCK ******/
} }
} else { } else {
response = (int32_t)MCA_GPR_NOTIFY_ID_MAX; response = (int32_t)MCA_GPR_NOTIFY_ID_MAX;
@ -895,8 +920,20 @@ void mca_gpr_replica_recv(int status, ompi_process_name_t* sender,
goto RETURN_ERROR; goto RETURN_ERROR;
} }
/******* LOCK *****/
OMPI_THREAD_LOCK(&mca_gpr_replica_component_mutex);
/* enter request on notify tracking system */ /* enter request on notify tracking system */
trackptr = OBJ_NEW(mca_gpr_notify_request_tracker_t); trackptr = OBJ_NEW(mca_gpr_notify_request_tracker_t);
if (mca_gpr_replica_debug) {
if (NULL != sender) {
ompi_output(0, "gpr_replica_recv: received synchro req from [%d,%d,%d]", sender->cellid,
sender->jobid, sender->vpid);
} else {
ompi_output(0, "gpr_replica_recv: received synchro req from NULL");
}
}
trackptr->requestor = ompi_name_server.copy_process_name(sender); trackptr->requestor = ompi_name_server.copy_process_name(sender);
trackptr->req_tag = id_tag; trackptr->req_tag = id_tag;
trackptr->callback = NULL; trackptr->callback = NULL;
@ -910,6 +947,9 @@ void mca_gpr_replica_recv(int status, ompi_process_name_t* sender,
} }
ompi_list_append(&mca_gpr_replica_notify_request_tracker, &trackptr->item); ompi_list_append(&mca_gpr_replica_notify_request_tracker, &trackptr->item);
OMPI_THREAD_UNLOCK(&mca_gpr_replica_component_mutex);
/****** UNLOCK ******/
if(NULL != gpr_replica_construct_trigger(synchro_mode, OMPI_REGISTRY_NOTIFY_NONE, if(NULL != gpr_replica_construct_trigger(synchro_mode, OMPI_REGISTRY_NOTIFY_NONE,
mode, segment, tokens, mode, segment, tokens,
trigger, trackptr->id_tag)) { trigger, trackptr->id_tag)) {
@ -979,6 +1019,10 @@ void mca_gpr_replica_recv(int status, ompi_process_name_t* sender,
segment, tokens, trigger); segment, tokens, trigger);
if (MCA_GPR_NOTIFY_ID_MAX != id_tag) { /* removed trigger successfully */ if (MCA_GPR_NOTIFY_ID_MAX != id_tag) { /* removed trigger successfully */
/******* LOCK *****/
OMPI_THREAD_LOCK(&mca_gpr_replica_component_mutex);
/* find request on replica notify tracking system */ /* find request on replica notify tracking system */
for (trackptr = (mca_gpr_notify_request_tracker_t*)ompi_list_get_first(&mca_gpr_replica_notify_request_tracker); for (trackptr = (mca_gpr_notify_request_tracker_t*)ompi_list_get_first(&mca_gpr_replica_notify_request_tracker);
trackptr != (mca_gpr_notify_request_tracker_t*)ompi_list_get_end(&mca_gpr_replica_notify_request_tracker) && trackptr != (mca_gpr_notify_request_tracker_t*)ompi_list_get_end(&mca_gpr_replica_notify_request_tracker) &&
@ -1001,6 +1045,10 @@ void mca_gpr_replica_recv(int status, ompi_process_name_t* sender,
ompi_list_append(&mca_gpr_replica_free_notify_id_tags, &ptr_free_id->item); ompi_list_append(&mca_gpr_replica_free_notify_id_tags, &ptr_free_id->item);
/* release tracker item */ /* release tracker item */
OBJ_RELEASE(trackptr); OBJ_RELEASE(trackptr);
OMPI_THREAD_UNLOCK(&mca_gpr_replica_component_mutex);
/****** UNLOCK ******/
} }
} else { } else {
response = (int32_t)MCA_GPR_NOTIFY_ID_MAX; response = (int32_t)MCA_GPR_NOTIFY_ID_MAX;
@ -1022,6 +1070,10 @@ void mca_gpr_replica_recv(int status, ompi_process_name_t* sender,
/***** TEST INTERNALS *****/ /***** TEST INTERNALS *****/
} else if (MCA_GPR_TEST_INTERNALS_CMD == command) { } else if (MCA_GPR_TEST_INTERNALS_CMD == command) {
/******* LOCK *****/
OMPI_THREAD_LOCK(&mca_gpr_replica_component_mutex);
if ((OMPI_SUCCESS != ompi_unpack(buffer, &test_level, 1, OMPI_INT32)) || if ((OMPI_SUCCESS != ompi_unpack(buffer, &test_level, 1, OMPI_INT32)) ||
(0 > test_level)) { (0 > test_level)) {
goto RETURN_ERROR; goto RETURN_ERROR;
@ -1053,8 +1105,11 @@ void mca_gpr_replica_recv(int status, ompi_process_name_t* sender,
if (0 > mca_oob_send_packed(sender, answer, tag, 0)) { if (0 > mca_oob_send_packed(sender, answer, tag, 0)) {
/* RHC -- not sure what to do if the return send fails */ /* RHC -- not sure what to do if the return send fails */
} }
OMPI_THREAD_UNLOCK(&mca_gpr_replica_component_mutex);
/****** UNLOCK ******/
/**** UNRECOGNIZED ****/
/**** UNRECOGNIZED COMMAND ****/
} else { /* got an unrecognized command */ } else { /* got an unrecognized command */
RETURN_ERROR: RETURN_ERROR:
ompi_buffer_init(&error_answer, 8); ompi_buffer_init(&error_answer, 8);

Просмотреть файл

@ -20,6 +20,9 @@
#include <libgen.h> #include <libgen.h>
#include "include/constants.h" #include "include/constants.h"
#include "threads/mutex.h"
#include "util/output.h" #include "util/output.h"
#include "util/printf.h" #include "util/printf.h"
#include "util/proc_info.h" #include "util/proc_info.h"
@ -29,6 +32,7 @@
#include "gpr_replica.h" #include "gpr_replica.h"
#include "gpr_replica_internals.h" #include "gpr_replica_internals.h"
/* /*
* *
*/ */
@ -38,8 +42,11 @@ mca_gpr_replica_segment_t *gpr_replica_define_segment(char *segment)
mca_gpr_replica_segment_t *seg; mca_gpr_replica_segment_t *seg;
mca_gpr_replica_key_t key; mca_gpr_replica_key_t key;
OMPI_THREAD_LOCK(&mca_gpr_replica_internals_mutex);
key = gpr_replica_define_key(segment, NULL); key = gpr_replica_define_key(segment, NULL);
if (MCA_GPR_REPLICA_KEY_MAX == key) { /* got some kind of error code */ if (MCA_GPR_REPLICA_KEY_MAX == key) { /* got some kind of error code */
OMPI_THREAD_UNLOCK(&mca_gpr_replica_internals_mutex);
return NULL; return NULL;
} }
@ -48,6 +55,8 @@ mca_gpr_replica_segment_t *gpr_replica_define_segment(char *segment)
seg->segment = key; seg->segment = key;
ompi_list_append(&mca_gpr_replica_head.registry, &seg->item); ompi_list_append(&mca_gpr_replica_head.registry, &seg->item);
OMPI_THREAD_UNLOCK(&mca_gpr_replica_internals_mutex);
return seg; return seg;
} }
@ -57,6 +66,8 @@ mca_gpr_replica_segment_t *gpr_replica_find_seg(bool create, char *segment)
mca_gpr_replica_keytable_t *ptr_seg; mca_gpr_replica_keytable_t *ptr_seg;
mca_gpr_replica_segment_t *seg; mca_gpr_replica_segment_t *seg;
OMPI_THREAD_LOCK(&mca_gpr_replica_internals_mutex);
/* search the registry segments to find which one is being referenced */ /* search the registry segments to find which one is being referenced */
for (ptr_seg = (mca_gpr_replica_keytable_t*)ompi_list_get_first(&mca_gpr_replica_head.segment_dict); for (ptr_seg = (mca_gpr_replica_keytable_t*)ompi_list_get_first(&mca_gpr_replica_head.segment_dict);
ptr_seg != (mca_gpr_replica_keytable_t*)ompi_list_get_end(&mca_gpr_replica_head.segment_dict); ptr_seg != (mca_gpr_replica_keytable_t*)ompi_list_get_end(&mca_gpr_replica_head.segment_dict);
@ -67,11 +78,15 @@ mca_gpr_replica_segment_t *gpr_replica_find_seg(bool create, char *segment)
seg != (mca_gpr_replica_segment_t*)ompi_list_get_end(&mca_gpr_replica_head.registry); seg != (mca_gpr_replica_segment_t*)ompi_list_get_end(&mca_gpr_replica_head.registry);
seg = (mca_gpr_replica_segment_t*)ompi_list_get_next(seg)) { seg = (mca_gpr_replica_segment_t*)ompi_list_get_next(seg)) {
if(seg->segment == ptr_seg->key) { if(seg->segment == ptr_seg->key) {
OMPI_THREAD_UNLOCK(&mca_gpr_replica_internals_mutex);
return(seg); return(seg);
} }
} }
} }
} }
OMPI_THREAD_UNLOCK(&mca_gpr_replica_internals_mutex);
if (create) { if (create) {
/* didn't find the dictionary entry - create it */ /* didn't find the dictionary entry - create it */
return gpr_replica_define_segment(segment); return gpr_replica_define_segment(segment);
@ -85,12 +100,15 @@ mca_gpr_replica_keytable_t *gpr_replica_find_dict_entry(char *segment, char *tok
mca_gpr_replica_keytable_t *ptr_key; mca_gpr_replica_keytable_t *ptr_key;
mca_gpr_replica_segment_t *seg; mca_gpr_replica_segment_t *seg;
OMPI_THREAD_LOCK(&mca_gpr_replica_internals_mutex);
/* search the registry segments to find which one is being referenced */ /* search the registry segments to find which one is being referenced */
for (ptr_seg = (mca_gpr_replica_keytable_t*)ompi_list_get_first(&mca_gpr_replica_head.segment_dict); for (ptr_seg = (mca_gpr_replica_keytable_t*)ompi_list_get_first(&mca_gpr_replica_head.segment_dict);
ptr_seg != (mca_gpr_replica_keytable_t*)ompi_list_get_end(&mca_gpr_replica_head.segment_dict); ptr_seg != (mca_gpr_replica_keytable_t*)ompi_list_get_end(&mca_gpr_replica_head.segment_dict);
ptr_seg = (mca_gpr_replica_keytable_t*)ompi_list_get_next(ptr_seg)) { ptr_seg = (mca_gpr_replica_keytable_t*)ompi_list_get_next(ptr_seg)) {
if (0 == strcmp(segment, ptr_seg->token)) { if (0 == strcmp(segment, ptr_seg->token)) {
if (NULL == token) { /* just want segment token-key pair */ if (NULL == token) { /* just want segment token-key pair */
OMPI_THREAD_UNLOCK(&mca_gpr_replica_internals_mutex);
return(ptr_seg); return(ptr_seg);
} }
/* search registry to find segment */ /* search registry to find segment */
@ -103,15 +121,19 @@ mca_gpr_replica_keytable_t *gpr_replica_find_dict_entry(char *segment, char *tok
ptr_key != (mca_gpr_replica_keytable_t*)ompi_list_get_end(&seg->keytable); ptr_key != (mca_gpr_replica_keytable_t*)ompi_list_get_end(&seg->keytable);
ptr_key = (mca_gpr_replica_keytable_t*)ompi_list_get_next(ptr_key)) { ptr_key = (mca_gpr_replica_keytable_t*)ompi_list_get_next(ptr_key)) {
if (0 == strcmp(token, ptr_key->token)) { if (0 == strcmp(token, ptr_key->token)) {
OMPI_THREAD_UNLOCK(&mca_gpr_replica_internals_mutex);
return(ptr_key); return(ptr_key);
} }
} }
OMPI_THREAD_UNLOCK(&mca_gpr_replica_internals_mutex);
return(NULL); /* couldn't find the specified entry */ return(NULL); /* couldn't find the specified entry */
} }
} }
OMPI_THREAD_UNLOCK(&mca_gpr_replica_internals_mutex);
return(NULL); /* couldn't find segment, even though we found entry in registry dict */ return(NULL); /* couldn't find segment, even though we found entry in registry dict */
} }
} }
OMPI_THREAD_UNLOCK(&mca_gpr_replica_internals_mutex);
return(NULL); /* couldn't find segment token-key pair */ return(NULL); /* couldn't find segment token-key pair */
} }
@ -152,15 +174,19 @@ char *gpr_replica_get_token(char *segment, mca_gpr_replica_key_t key)
return NULL; return NULL;
} }
OMPI_THREAD_LOCK(&mca_gpr_replica_internals_mutex);
/* find the matching key */ /* find the matching key */
for (ptr_key = (mca_gpr_replica_keytable_t*)ompi_list_get_first(&seg->keytable); for (ptr_key = (mca_gpr_replica_keytable_t*)ompi_list_get_first(&seg->keytable);
ptr_key != (mca_gpr_replica_keytable_t*)ompi_list_get_end(&seg->keytable); ptr_key != (mca_gpr_replica_keytable_t*)ompi_list_get_end(&seg->keytable);
ptr_key = (mca_gpr_replica_keytable_t*)ompi_list_get_next(ptr_key)) { ptr_key = (mca_gpr_replica_keytable_t*)ompi_list_get_next(ptr_key)) {
if (key == ptr_key->key) { if (key == ptr_key->key) {
answer = strdup(ptr_key->token); answer = strdup(ptr_key->token);
OMPI_THREAD_UNLOCK(&mca_gpr_replica_internals_mutex);
return answer; return answer;
} }
} }
OMPI_THREAD_UNLOCK(&mca_gpr_replica_internals_mutex);
return(NULL); /* couldn't find the specified entry */ return(NULL); /* couldn't find the specified entry */
} }
@ -200,6 +226,9 @@ mca_gpr_replica_key_t gpr_replica_define_key(char *segment, char *token)
/* if token is NULL, then this is defining a segment name. Check dictionary to ensure uniqueness */ /* if token is NULL, then this is defining a segment name. Check dictionary to ensure uniqueness */
if (NULL == token) { if (NULL == token) {
OMPI_THREAD_LOCK(&mca_gpr_replica_internals_mutex);
for (ptr_seg = (mca_gpr_replica_keytable_t*)ompi_list_get_first(&mca_gpr_replica_head.segment_dict); for (ptr_seg = (mca_gpr_replica_keytable_t*)ompi_list_get_first(&mca_gpr_replica_head.segment_dict);
ptr_seg != (mca_gpr_replica_keytable_t*)ompi_list_get_end(&mca_gpr_replica_head.segment_dict); ptr_seg != (mca_gpr_replica_keytable_t*)ompi_list_get_end(&mca_gpr_replica_head.segment_dict);
ptr_seg = (mca_gpr_replica_keytable_t*)ompi_list_get_next(ptr_seg)) { ptr_seg = (mca_gpr_replica_keytable_t*)ompi_list_get_next(ptr_seg)) {
@ -216,6 +245,7 @@ mca_gpr_replica_key_t gpr_replica_define_key(char *segment, char *token)
mca_gpr_replica_head.lastkey++; mca_gpr_replica_head.lastkey++;
new->key = mca_gpr_replica_head.lastkey; new->key = mca_gpr_replica_head.lastkey;
} else { /* out of keys */ } else { /* out of keys */
OMPI_THREAD_UNLOCK(&mca_gpr_replica_internals_mutex);
return MCA_GPR_REPLICA_KEY_MAX; return MCA_GPR_REPLICA_KEY_MAX;
} }
} else { } else {
@ -223,8 +253,10 @@ mca_gpr_replica_key_t gpr_replica_define_key(char *segment, char *token)
new->key = ptr_key->key; new->key = ptr_key->key;
} }
ompi_list_append(&mca_gpr_replica_head.segment_dict, &new->item); ompi_list_append(&mca_gpr_replica_head.segment_dict, &new->item);
OMPI_THREAD_UNLOCK(&mca_gpr_replica_internals_mutex);
return new->key; return new->key;
} }
OMPI_THREAD_LOCK(&mca_gpr_replica_internals_mutex);
/* okay, token is specified */ /* okay, token is specified */
/* search the registry segments to find which one is being referenced */ /* search the registry segments to find which one is being referenced */
@ -235,6 +267,7 @@ mca_gpr_replica_key_t gpr_replica_define_key(char *segment, char *token)
ptr_key != (mca_gpr_replica_keytable_t*)ompi_list_get_end(&seg->keytable); ptr_key != (mca_gpr_replica_keytable_t*)ompi_list_get_end(&seg->keytable);
ptr_key = (mca_gpr_replica_keytable_t*)ompi_list_get_next(ptr_key)) { ptr_key = (mca_gpr_replica_keytable_t*)ompi_list_get_next(ptr_key)) {
if (0 == strcmp(token, ptr_key->token)) { if (0 == strcmp(token, ptr_key->token)) {
OMPI_THREAD_UNLOCK(&mca_gpr_replica_internals_mutex);
return ptr_key->key; /* already taken, report value */ return ptr_key->key; /* already taken, report value */
} }
} }
@ -249,6 +282,7 @@ mca_gpr_replica_key_t gpr_replica_define_key(char *segment, char *token)
new->key = ptr_key->key; new->key = ptr_key->key;
} }
ompi_list_append(&seg->keytable, &new->item); ompi_list_append(&seg->keytable, &new->item);
OMPI_THREAD_UNLOCK(&mca_gpr_replica_internals_mutex);
return new->key; return new->key;
} }
/* couldn't find segment */ /* couldn't find segment */
@ -268,6 +302,8 @@ int gpr_replica_delete_key(char *segment, char *token)
return(OMPI_ERROR); return(OMPI_ERROR);
} }
OMPI_THREAD_LOCK(&mca_gpr_replica_internals_mutex);
/* find the segment */ /* find the segment */
seg = gpr_replica_find_seg(false, segment); seg = gpr_replica_find_seg(false, segment);
if (NULL != seg) { if (NULL != seg) {
@ -275,13 +311,16 @@ int gpr_replica_delete_key(char *segment, char *token)
/* if specified token is NULL, then this is deleting a segment name.*/ /* if specified token is NULL, then this is deleting a segment name.*/
if (NULL == token) { if (NULL == token) {
if (OMPI_SUCCESS != gpr_replica_empty_segment(seg)) { /* couldn't empty segment */ if (OMPI_SUCCESS != gpr_replica_empty_segment(seg)) { /* couldn't empty segment */
OMPI_THREAD_UNLOCK(&mca_gpr_replica_internals_mutex);
return OMPI_ERROR; return OMPI_ERROR;
} }
/* now remove the dictionary entry from the global registry dictionary*/ /* now remove the dictionary entry from the global registry dictionary*/
ptr_seg = gpr_replica_find_dict_entry(segment, NULL); ptr_seg = gpr_replica_find_dict_entry(segment, NULL);
if (NULL == ptr_seg) { /* failed to find dictionary entry */ if (NULL == ptr_seg) { /* failed to find dictionary entry */
OMPI_THREAD_UNLOCK(&mca_gpr_replica_internals_mutex);
return OMPI_ERROR; return OMPI_ERROR;
} }
/* add key to global registry's freekey list */ /* add key to global registry's freekey list */
new = OBJ_NEW(mca_gpr_replica_keytable_t); new = OBJ_NEW(mca_gpr_replica_keytable_t);
new->token = NULL; new->token = NULL;
@ -290,6 +329,9 @@ int gpr_replica_delete_key(char *segment, char *token)
/* remove the dictionary entry */ /* remove the dictionary entry */
ompi_list_remove_item(&mca_gpr_replica_head.segment_dict, &ptr_seg->item); ompi_list_remove_item(&mca_gpr_replica_head.segment_dict, &ptr_seg->item);
OMPI_THREAD_UNLOCK(&mca_gpr_replica_internals_mutex);
return(OMPI_SUCCESS); return(OMPI_SUCCESS);
} else { /* token not null, so need to find dictionary element to delete */ } else { /* token not null, so need to find dictionary element to delete */
@ -316,12 +358,15 @@ int gpr_replica_delete_key(char *segment, char *token)
/* now remove the dictionary entry from the segment's dictionary */ /* now remove the dictionary entry from the segment's dictionary */
ompi_list_remove_item(&seg->keytable, &ptr_key->item); ompi_list_remove_item(&seg->keytable, &ptr_key->item);
OMPI_THREAD_UNLOCK(&mca_gpr_replica_internals_mutex);
return(OMPI_SUCCESS); return(OMPI_SUCCESS);
} }
} }
OMPI_THREAD_UNLOCK(&mca_gpr_replica_internals_mutex);
return(OMPI_ERROR); /* if we get here, then we couldn't find token in dictionary */ return(OMPI_ERROR); /* if we get here, then we couldn't find token in dictionary */
} }
} }
OMPI_THREAD_UNLOCK(&mca_gpr_replica_internals_mutex);
return(OMPI_ERROR); /* if we get here, then we couldn't find segment */ return(OMPI_ERROR); /* if we get here, then we couldn't find segment */
} }
@ -333,6 +378,8 @@ int gpr_replica_empty_segment(mca_gpr_replica_segment_t *seg)
/* need to free memory from each entry - remove_last returns pointer to the entry */ /* need to free memory from each entry - remove_last returns pointer to the entry */
OMPI_THREAD_LOCK(&mca_gpr_replica_internals_mutex);
/* empty the segment's registry */ /* empty the segment's registry */
while (!ompi_list_is_empty(&seg->registry_entries)) { while (!ompi_list_is_empty(&seg->registry_entries)) {
ptr = (mca_gpr_replica_core_t*)ompi_list_remove_first(&seg->registry_entries); ptr = (mca_gpr_replica_core_t*)ompi_list_remove_first(&seg->registry_entries);
@ -354,6 +401,8 @@ int gpr_replica_empty_segment(mca_gpr_replica_segment_t *seg)
ompi_list_remove_item(&mca_gpr_replica_head.registry, &seg->item); ompi_list_remove_item(&mca_gpr_replica_head.registry, &seg->item);
OBJ_RELEASE(seg); OBJ_RELEASE(seg);
OMPI_THREAD_UNLOCK(&mca_gpr_replica_internals_mutex);
return OMPI_SUCCESS; return OMPI_SUCCESS;
} }
@ -437,7 +486,8 @@ bool gpr_replica_check_key_list(ompi_registry_mode_t addr_mode,
return false; return false;
} }
mca_gpr_replica_trigger_list_t *gpr_replica_construct_trigger(ompi_registry_synchro_mode_t synchro_mode, mca_gpr_replica_trigger_list_t*
gpr_replica_construct_trigger(ompi_registry_synchro_mode_t synchro_mode,
ompi_registry_notify_action_t action, ompi_registry_notify_action_t action,
ompi_registry_mode_t addr_mode, ompi_registry_mode_t addr_mode,
char *segment, char **tokens, int trigger, char *segment, char **tokens, int trigger,
@ -450,8 +500,11 @@ mca_gpr_replica_trigger_list_t *gpr_replica_construct_trigger(ompi_registry_sync
mca_gpr_replica_key_t *keyptr; mca_gpr_replica_key_t *keyptr;
int i, num_tokens; int i, num_tokens;
OMPI_THREAD_LOCK(&mca_gpr_replica_internals_mutex);
seg = gpr_replica_find_seg(true, segment); seg = gpr_replica_find_seg(true, segment);
if (NULL == seg) { /* couldn't find or create segment */ if (NULL == seg) { /* couldn't find or create segment */
OMPI_THREAD_UNLOCK(&mca_gpr_replica_internals_mutex);
return NULL; return NULL;
} }
@ -519,6 +572,8 @@ mca_gpr_replica_trigger_list_t *gpr_replica_construct_trigger(ompi_registry_sync
ompi_list_append(&seg->triggers, &trig->item); ompi_list_append(&seg->triggers, &trig->item);
OMPI_THREAD_UNLOCK(&mca_gpr_replica_internals_mutex);
return trig; return trig;
} }
@ -536,8 +591,11 @@ mca_gpr_notify_id_t gpr_replica_remove_trigger(ompi_registry_synchro_mode_t sync
int i=0, num_tokens=0; int i=0, num_tokens=0;
bool found=false, mismatch=false; bool found=false, mismatch=false;
OMPI_THREAD_LOCK(&mca_gpr_replica_internals_mutex);
seg = gpr_replica_find_seg(false, segment); seg = gpr_replica_find_seg(false, segment);
if (NULL == seg) { /* couldn't find segment */ if (NULL == seg) { /* couldn't find segment */
OMPI_THREAD_UNLOCK(&mca_gpr_replica_internals_mutex);
return MCA_GPR_NOTIFY_ID_MAX; return MCA_GPR_NOTIFY_ID_MAX;
} }
@ -565,6 +623,7 @@ mca_gpr_notify_id_t gpr_replica_remove_trigger(ompi_registry_synchro_mode_t sync
} }
} }
/* search segment's trigger list for specified trigger event */ /* search segment's trigger list for specified trigger event */
for (trig = (mca_gpr_replica_trigger_list_t*)ompi_list_get_first(&seg->triggers); for (trig = (mca_gpr_replica_trigger_list_t*)ompi_list_get_first(&seg->triggers);
trig != (mca_gpr_replica_trigger_list_t*)ompi_list_get_end(&seg->triggers) && !found; trig != (mca_gpr_replica_trigger_list_t*)ompi_list_get_end(&seg->triggers) && !found;
@ -595,9 +654,12 @@ mca_gpr_notify_id_t gpr_replica_remove_trigger(ompi_registry_synchro_mode_t sync
id_tag = trig->id_tag; id_tag = trig->id_tag;
ompi_list_remove_item(&seg->triggers, &trig->item); ompi_list_remove_item(&seg->triggers, &trig->item);
OBJ_RELEASE(trig); OBJ_RELEASE(trig);
OMPI_THREAD_UNLOCK(&mca_gpr_replica_internals_mutex);
return id_tag; return id_tag;
} }
OMPI_THREAD_UNLOCK(&mca_gpr_replica_internals_mutex);
return MCA_GPR_NOTIFY_ID_MAX; return MCA_GPR_NOTIFY_ID_MAX;
} }
@ -684,6 +746,8 @@ void gpr_replica_process_triggers(char *segment,
return; return;
} }
OMPI_THREAD_LOCK(&mca_gpr_replica_internals_mutex);
seg = gpr_replica_find_seg(false, segment); seg = gpr_replica_find_seg(false, segment);
if (NULL == seg) { /* couldn't find segment */ if (NULL == seg) { /* couldn't find segment */
return; return;
@ -694,6 +758,7 @@ void gpr_replica_process_triggers(char *segment,
ompi_process_info.name->jobid, ompi_process_info.name->vpid); ompi_process_info.name->jobid, ompi_process_info.name->vpid);
} }
/* find corresponding notify request */ /* find corresponding notify request */
found = false; found = false;
for (trackptr = (mca_gpr_notify_request_tracker_t*)ompi_list_get_first(&mca_gpr_replica_notify_request_tracker); for (trackptr = (mca_gpr_notify_request_tracker_t*)ompi_list_get_first(&mca_gpr_replica_notify_request_tracker);
@ -707,6 +772,7 @@ void gpr_replica_process_triggers(char *segment,
if (!found) { /* didn't find request */ if (!found) { /* didn't find request */
ompi_output(0, "Notification error - request not found"); ompi_output(0, "Notification error - request not found");
OMPI_THREAD_UNLOCK(&mca_gpr_replica_internals_mutex);
return; return;
} }
@ -757,6 +823,8 @@ void gpr_replica_process_triggers(char *segment,
ompi_process_info.name->jobid, ompi_process_info.name->vpid); ompi_process_info.name->jobid, ompi_process_info.name->vpid);
} }
OMPI_THREAD_UNLOCK(&mca_gpr_replica_internals_mutex);
} }
ompi_list_t *gpr_replica_test_internals(int level) ompi_list_t *gpr_replica_test_internals(int level)
@ -771,6 +839,8 @@ ompi_list_t *gpr_replica_test_internals(int level)
mca_gpr_replica_keytable_t *dict_entry; mca_gpr_replica_keytable_t *dict_entry;
bool success; bool success;
OMPI_THREAD_LOCK(&mca_gpr_replica_internals_mutex);
test_results = OBJ_NEW(ompi_list_t); test_results = OBJ_NEW(ompi_list_t);
ompi_output(0, "testing define segment"); ompi_output(0, "testing define segment");
@ -963,5 +1033,7 @@ ompi_list_t *gpr_replica_test_internals(int level)
/* check ability to empty segment */ /* check ability to empty segment */
OMPI_THREAD_UNLOCK(&mca_gpr_replica_internals_mutex);
return test_results; return test_results;
} }

Просмотреть файл

@ -6,6 +6,8 @@
*/ */
#include <stdio.h> #include <stdio.h>
#include "threads/mutex.h"
#include "ompi_config.h" #include "ompi_config.h"
#include "util/output.h" #include "util/output.h"
#include "mca/mca.h" #include "mca/mca.h"
@ -22,10 +24,14 @@
mca_ns_base_cellid_t ns_replica_create_cellid(void) mca_ns_base_cellid_t ns_replica_create_cellid(void)
{ {
OMPI_THREAD_LOCK(&mca_ns_replica_mutex);
if ((MCA_NS_BASE_CELLID_MAX-2) >= mca_ns_replica_last_used_cellid) { if ((MCA_NS_BASE_CELLID_MAX-2) >= mca_ns_replica_last_used_cellid) {
mca_ns_replica_last_used_cellid = mca_ns_replica_last_used_cellid + 1; mca_ns_replica_last_used_cellid = mca_ns_replica_last_used_cellid + 1;
OMPI_THREAD_UNLOCK(&mca_ns_replica_mutex);
return(mca_ns_replica_last_used_cellid); return(mca_ns_replica_last_used_cellid);
} else { } else {
OMPI_THREAD_UNLOCK(&mca_ns_replica_mutex);
return MCA_NS_BASE_CELLID_MAX; return MCA_NS_BASE_CELLID_MAX;
} }
} }
@ -34,14 +40,18 @@ mca_ns_base_jobid_t ns_replica_create_jobid(void)
{ {
mca_ns_replica_name_tracker_t *new; mca_ns_replica_name_tracker_t *new;
OMPI_THREAD_LOCK(&mca_ns_replica_mutex);
if ((MCA_NS_BASE_JOBID_MAX-2) >= mca_ns_replica_last_used_jobid) { if ((MCA_NS_BASE_JOBID_MAX-2) >= mca_ns_replica_last_used_jobid) {
mca_ns_replica_last_used_jobid = mca_ns_replica_last_used_jobid + 1; mca_ns_replica_last_used_jobid = mca_ns_replica_last_used_jobid + 1;
new = OBJ_NEW(mca_ns_replica_name_tracker_t); new = OBJ_NEW(mca_ns_replica_name_tracker_t);
new->job = mca_ns_replica_last_used_jobid; new->job = mca_ns_replica_last_used_jobid;
new->last_used_vpid = 0; new->last_used_vpid = 0;
ompi_list_append(&mca_ns_replica_name_tracker, &new->item); ompi_list_append(&mca_ns_replica_name_tracker, &new->item);
OMPI_THREAD_UNLOCK(&mca_ns_replica_mutex);
return(mca_ns_replica_last_used_jobid); return(mca_ns_replica_last_used_jobid);
} else { } else {
OMPI_THREAD_UNLOCK(&mca_ns_replica_mutex);
return MCA_NS_BASE_JOBID_MAX; return MCA_NS_BASE_JOBID_MAX;
} }
} }
@ -52,17 +62,24 @@ mca_ns_base_vpid_t ns_replica_reserve_range(mca_ns_base_jobid_t job, mca_ns_base
mca_ns_replica_name_tracker_t *ptr; mca_ns_replica_name_tracker_t *ptr;
mca_ns_base_vpid_t start; mca_ns_base_vpid_t start;
OMPI_THREAD_LOCK(&mca_ns_replica_mutex);
for (ptr = (mca_ns_replica_name_tracker_t*)ompi_list_get_first(&mca_ns_replica_name_tracker); for (ptr = (mca_ns_replica_name_tracker_t*)ompi_list_get_first(&mca_ns_replica_name_tracker);
ptr != (mca_ns_replica_name_tracker_t*)ompi_list_get_end(&mca_ns_replica_name_tracker); ptr != (mca_ns_replica_name_tracker_t*)ompi_list_get_end(&mca_ns_replica_name_tracker);
ptr = (mca_ns_replica_name_tracker_t*)ompi_list_get_next(ptr)) { ptr = (mca_ns_replica_name_tracker_t*)ompi_list_get_next(ptr)) {
if (job == ptr->job) { /* found the specified job */ if (job == ptr->job) { /* found the specified job */
if ((MCA_NS_BASE_VPID_MAX-range-2) >= ptr->last_used_vpid) { /* requested range available */ if ((MCA_NS_BASE_VPID_MAX-range-2) >= ptr->last_used_vpid) { /* requested range available */
start = ptr->last_used_vpid + 1; start = ptr->last_used_vpid;
ptr->last_used_vpid = ptr->last_used_vpid + range; if (0 == job && start == 0) { /* vpid=0 reserved for job=0 */
start = 1;
}
ptr->last_used_vpid = start + range;
OMPI_THREAD_UNLOCK(&mca_ns_replica_mutex);
return(start); return(start);
} }
} }
} }
OMPI_THREAD_UNLOCK(&mca_ns_replica_mutex);
return MCA_NS_BASE_VPID_MAX; return MCA_NS_BASE_VPID_MAX;
} }

Просмотреть файл

@ -9,6 +9,7 @@
#include "ompi_config.h" #include "ompi_config.h"
#include "include/types.h" #include "include/types.h"
#include "include/constants.h" #include "include/constants.h"
#include "threads/mutex.h"
#include "class/ompi_list.h" #include "class/ompi_list.h"
#include "mca/oob/oob.h" #include "mca/oob/oob.h"
#include "mca/ns/ns.h" #include "mca/ns/ns.h"
@ -34,6 +35,7 @@ extern mca_ns_base_cellid_t mca_ns_replica_last_used_cellid;
extern mca_ns_base_jobid_t mca_ns_replica_last_used_jobid; extern mca_ns_base_jobid_t mca_ns_replica_last_used_jobid;
extern ompi_list_t mca_ns_replica_name_tracker; extern ompi_list_t mca_ns_replica_name_tracker;
extern int mca_ns_replica_debug; extern int mca_ns_replica_debug;
extern ompi_mutex_t mca_ns_replica_mutex;
/* /*
* Module open / close * Module open / close

Просмотреть файл

@ -18,6 +18,7 @@
#include "ompi_config.h" #include "ompi_config.h"
#include "include/constants.h" #include "include/constants.h"
#include "threads/mutex.h"
#include "util/proc_info.h" #include "util/proc_info.h"
#include "util/output.h" #include "util/output.h"
#include "mca/mca.h" #include "mca/mca.h"
@ -102,6 +103,7 @@ mca_ns_base_cellid_t mca_ns_replica_last_used_cellid;
mca_ns_base_jobid_t mca_ns_replica_last_used_jobid; mca_ns_base_jobid_t mca_ns_replica_last_used_jobid;
ompi_list_t mca_ns_replica_name_tracker; ompi_list_t mca_ns_replica_name_tracker;
int mca_ns_replica_debug; int mca_ns_replica_debug;
ompi_mutex_t mca_ns_replica_mutex;
/* /*
* don't really need this function - could just put NULL in the above structure * don't really need this function - could just put NULL in the above structure

Просмотреть файл

@ -156,10 +156,10 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
/* /*
* Register my process info with my replica. * Register my process info with my replica.
*/ */
if (OMPI_SUCCESS != (ret = ompi_rte_register())) { /* if (OMPI_SUCCESS != (ret = ompi_rte_register())) { */
error = "ompi_rte_init: failed in ompi_rte_register()\n"; /* error = "ompi_rte_init: failed in ompi_rte_register()\n"; */
goto error; /* goto error; */
} /* } */
/* finalize the rte startup */ /* finalize the rte startup */

Просмотреть файл

@ -222,10 +222,10 @@ main(int argc, char *argv[])
/* /*
* Register my process info with my replica. * Register my process info with my replica.
*/ */
if (OMPI_SUCCESS != (ret = ompi_rte_register())) { /* if (OMPI_SUCCESS != (ret = ompi_rte_register())) { */
ompi_output(0, "ompi_rte_init: failed in ompi_rte_register()\n"); /* ompi_output(0, "ompi_rte_init: failed in ompi_rte_register()\n"); */
return ret; /* return ret; */
} /* } */
/* finalize the rte startup */ /* finalize the rte startup */
if (OMPI_SUCCESS != (ret = ompi_rte_init_finalstage(&multi_thread, if (OMPI_SUCCESS != (ret = ompi_rte_init_finalstage(&multi_thread,