adding code to figure out which processes are on the same
host, and limit the number of procs that can use a given shared memory region. This commit was SVN r1230.
Этот коммит содержится в:
родитель
809f4413f4
Коммит
fc9c512ba8
@ -45,7 +45,41 @@ int mca_ptl_sm_add_procs(
|
|||||||
struct mca_ptl_base_peer_t **peers,
|
struct mca_ptl_base_peer_t **peers,
|
||||||
ompi_bitmap_t* reachability)
|
ompi_bitmap_t* reachability)
|
||||||
{
|
{
|
||||||
|
int proc,rc;
|
||||||
|
size_t size;
|
||||||
|
mca_ptl_sm_exchange_t **sm_proc_info;
|
||||||
|
|
||||||
|
/* allocate array to hold setup shared memory from all
|
||||||
|
* other procs */
|
||||||
|
sm_proc_info=(mca_ptl_sm_exchange_t **)
|
||||||
|
malloc(nprocs*sizeof(mca_ptl_sm_exchange_t *));
|
||||||
|
if( NULL == sm_proc_info ){
|
||||||
|
rc=OMPI_ERR_OUT_OF_RESOURCE;
|
||||||
|
goto CLEANUP;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* get unique host identifier for each process in the list */
|
||||||
|
for( proc=0 ; proc < nprocs; proc++ ) {
|
||||||
|
rc = mca_base_modex_recv(
|
||||||
|
&mca_ptl_sm_module.super.ptlm_version, proc,
|
||||||
|
(void**)(sm_proc_info+proc), &size);
|
||||||
|
if(rc != OMPI_SUCCESS) {
|
||||||
|
ompi_output(0, "mca_ptl_sm_add_procs: mca_base_modex_recv: failed with return value=%d", rc);
|
||||||
|
goto CLEANUP;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* free local memory */
|
||||||
|
if(sm_proc_info){
|
||||||
|
free(sm_proc_info);
|
||||||
|
}
|
||||||
|
|
||||||
return OMPI_SUCCESS;
|
return OMPI_SUCCESS;
|
||||||
|
|
||||||
|
CLEANUP:
|
||||||
|
if(sm_proc_info){
|
||||||
|
free(sm_proc_info);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -27,6 +27,7 @@ struct mca_ptl_sm_module_1_0_0_t {
|
|||||||
int sm_free_list_num; /**< initial size of free lists */
|
int sm_free_list_num; /**< initial size of free lists */
|
||||||
int sm_free_list_max; /**< maximum size of free lists */
|
int sm_free_list_max; /**< maximum size of free lists */
|
||||||
int sm_free_list_inc; /**< number of elements to alloc when growing free lists */
|
int sm_free_list_inc; /**< number of elements to alloc when growing free lists */
|
||||||
|
int sm_max_procs; /**< upper limit on the number of processes using the shared memory pool */
|
||||||
void* sm_base_addr; /**< base address of mmaped region */
|
void* sm_base_addr; /**< base address of mmaped region */
|
||||||
ompi_free_list_t sm_send_requests; /**< free list of sm send requests -- sendreq + sendfrag */
|
ompi_free_list_t sm_send_requests; /**< free list of sm send requests -- sendreq + sendfrag */
|
||||||
ompi_free_list_t sm_send_frags; /**< free list of sm send fragments */
|
ompi_free_list_t sm_send_frags; /**< free list of sm send fragments */
|
||||||
@ -194,5 +195,15 @@ extern int mca_ptl_sm_send(
|
|||||||
int flags
|
int flags
|
||||||
);
|
);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Data structure used to hold information that will be exchanged with
|
||||||
|
* all other procs at startup. !!!!! This is only temporary, until the
|
||||||
|
* registry is complete
|
||||||
|
*/
|
||||||
|
#define MCA_PTL_SM_MAX_HOSTNAME_LEN 128
|
||||||
|
typedef struct mca_ptl_sm_exchange{
|
||||||
|
char host_name[MCA_PTL_SM_MAX_HOSTNAME_LEN];
|
||||||
|
}mca_ptl_sm_exchange_t;
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -30,7 +30,7 @@ static mca_ptl_sm_mmap_t* mca_ptl_sm_mmap_open(size_t size)
|
|||||||
|
|
||||||
while(fd < 0) {
|
while(fd < 0) {
|
||||||
struct timespec ts;
|
struct timespec ts;
|
||||||
fd = shm_open(mca_ptl_sm_module.sm_mmap_file, O_CREAT|O_RDWR, 0000);
|
/*fd = shm_open(mca_ptl_sm_module.sm_mmap_file, O_CREAT|O_RDWR, 0000); */
|
||||||
if(fd < 0 && errno != EACCES) {
|
if(fd < 0 && errno != EACCES) {
|
||||||
ompi_output(0, "mca_ptl_sm_mmap_open: open failed with errno=%d\n", errno);
|
ompi_output(0, "mca_ptl_sm_mmap_open: open failed with errno=%d\n", errno);
|
||||||
return NULL;
|
return NULL;
|
||||||
@ -61,6 +61,9 @@ mca_ptl_sm_mmap_t* mca_ptl_sm_mmap_init(size_t size)
|
|||||||
{
|
{
|
||||||
static int segnum = 0;
|
static int segnum = 0;
|
||||||
|
|
||||||
|
/* debug !!!!! */
|
||||||
|
return OMPI_SUCCESS;
|
||||||
|
|
||||||
ompi_job_handle_t job_handle = mca_pcm.pcm_handle_get();
|
ompi_job_handle_t job_handle = mca_pcm.pcm_handle_get();
|
||||||
char hostname[64];
|
char hostname[64];
|
||||||
int fd;
|
int fd;
|
||||||
@ -69,7 +72,7 @@ mca_ptl_sm_mmap_t* mca_ptl_sm_mmap_init(size_t size)
|
|||||||
|
|
||||||
gethostname(hostname, sizeof(hostname));
|
gethostname(hostname, sizeof(hostname));
|
||||||
sprintf(mca_ptl_sm_module.sm_mmap_file, "/%s.%s.%d", hostname, job_handle, segnum++);
|
sprintf(mca_ptl_sm_module.sm_mmap_file, "/%s.%s.%d", hostname, job_handle, segnum++);
|
||||||
fd = shm_open(mca_ptl_sm_module.sm_mmap_file, O_CREAT|O_RDWR, 0000);
|
/*fd = shm_open(mca_ptl_sm_module.sm_mmap_file, O_CREAT|O_RDWR, 0000); */
|
||||||
if(fd < 0) {
|
if(fd < 0) {
|
||||||
if(errno == EACCES)
|
if(errno == EACCES)
|
||||||
return mca_ptl_sm_mmap_open(size);
|
return mca_ptl_sm_mmap_open(size);
|
||||||
|
@ -13,11 +13,13 @@
|
|||||||
#include "util/if.h"
|
#include "util/if.h"
|
||||||
#include "util/argv.h"
|
#include "util/argv.h"
|
||||||
#include "util/output.h"
|
#include "util/output.h"
|
||||||
|
#include "util/sys_info.h"
|
||||||
#include "mca/pml/pml.h"
|
#include "mca/pml/pml.h"
|
||||||
#include "mca/ptl/ptl.h"
|
#include "mca/ptl/ptl.h"
|
||||||
#include "mca/ptl/base/ptl_base_sendreq.h"
|
#include "mca/ptl/base/ptl_base_sendreq.h"
|
||||||
#include "mca/base/mca_base_param.h"
|
#include "mca/base/mca_base_param.h"
|
||||||
#include "mca/base/mca_base_module_exchange.h"
|
#include "mca/base/mca_base_module_exchange.h"
|
||||||
|
#include "mca/ptl/sm/src/ptl_sm.h"
|
||||||
#include "ptl_sm.h"
|
#include "ptl_sm.h"
|
||||||
#include "ptl_sm_sendreq.h"
|
#include "ptl_sm_sendreq.h"
|
||||||
#include "ptl_sm_sendfrag.h"
|
#include "ptl_sm_sendfrag.h"
|
||||||
@ -114,6 +116,8 @@ int mca_ptl_sm_module_open(void)
|
|||||||
mca_ptl_sm_param_register_int("free_list_max", -1);
|
mca_ptl_sm_param_register_int("free_list_max", -1);
|
||||||
mca_ptl_sm_module.sm_free_list_inc =
|
mca_ptl_sm_module.sm_free_list_inc =
|
||||||
mca_ptl_sm_param_register_int("free_list_inc", 256);
|
mca_ptl_sm_param_register_int("free_list_inc", 256);
|
||||||
|
mca_ptl_sm_module.sm_max_procs =
|
||||||
|
mca_ptl_sm_param_register_int("max_procs", -1);
|
||||||
|
|
||||||
/* initialize objects */
|
/* initialize objects */
|
||||||
OBJ_CONSTRUCT(&mca_ptl_sm_module.sm_lock, ompi_mutex_t);
|
OBJ_CONSTRUCT(&mca_ptl_sm_module.sm_lock, ompi_mutex_t);
|
||||||
@ -226,7 +230,40 @@ int mca_ptl_sm_module_progress(mca_ptl_tstamp_t tstamp)
|
|||||||
|
|
||||||
static int mca_ptl_sm_module_exchange()
|
static int mca_ptl_sm_module_exchange()
|
||||||
{
|
{
|
||||||
|
/*
|
||||||
|
* !!!! This is temporary, and will be removed when the
|
||||||
|
* registry is implemented
|
||||||
|
*/
|
||||||
|
mca_ptl_sm_exchange_t mca_ptl_sm_setup_info;
|
||||||
|
size_t len,size;
|
||||||
|
char *ptr;
|
||||||
|
int rc;
|
||||||
|
|
||||||
|
/* determine length of host name */
|
||||||
|
len=strlen(ompi_system_info.nodename);
|
||||||
|
/* check if string is zero length or there is an error */
|
||||||
|
if( 0 >= len) {
|
||||||
|
return OMPI_ERROR;
|
||||||
|
}
|
||||||
|
/* check if string is too long */
|
||||||
|
if( MCA_PTL_SM_MAX_HOSTNAME_LEN < (len+1) ){
|
||||||
|
return OMPI_ERROR;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* copy string into structure that will be used to send data around */
|
||||||
|
ptr=NULL;
|
||||||
|
ptr=strncpy(&(mca_ptl_sm_setup_info.host_name[0]),
|
||||||
|
ompi_system_info.nodename, len);
|
||||||
|
if( NULL == ptr ) {
|
||||||
|
return OMPI_ERROR;
|
||||||
|
}
|
||||||
|
mca_ptl_sm_setup_info.host_name[len]='\0';
|
||||||
|
|
||||||
|
/* exchange setup information */
|
||||||
|
size=sizeof(mca_ptl_sm_exchange_t);
|
||||||
|
rc = mca_base_modex_send(&mca_ptl_sm_module.super.ptlm_version,
|
||||||
|
&mca_ptl_sm_setup_info, size);
|
||||||
|
|
||||||
return OMPI_SUCCESS;
|
return OMPI_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user