diff --git a/ompi/class/ompi_circular_buffer_fifo.h b/ompi/class/ompi_circular_buffer_fifo.h index 21a10f6239..c360ff7054 100644 --- a/ompi/class/ompi_circular_buffer_fifo.h +++ b/ompi/class/ompi_circular_buffer_fifo.h @@ -18,6 +18,7 @@ #ifndef _OMPI_CIRCULAR_BUFFER_FIFO #define _OMPI_CIRCULAR_BUFFER_FIFO +#include /* for getpagesize() */ #include "ompi/constants.h" #include "opal/sys/cache.h" @@ -125,10 +126,10 @@ static inline int ompi_cb_fifo_size(ompi_cb_fifo_t *fifo) { * */ static inline int ompi_cb_fifo_init(int size_of_fifo, - int lazy_free_freq, int fifo_memory_locality_index, - int head_memory_locality_index, int tail_memory_locality_index, - ompi_cb_fifo_t *fifo, ptrdiff_t offset, - mca_mpool_base_module_t *memory_allocator) + int lazy_free_freq, + mca_mpool_base_module_t *head_mpool, + mca_mpool_base_module_t *tail_mpool, + ompi_cb_fifo_t *fifo, ptrdiff_t offset) { int i, size; char *buf; @@ -154,31 +155,21 @@ static inline int ompi_cb_fifo_init(int size_of_fifo, fifo->mask = (size - 1); /* allocate fifo array */ - buf = (char *) memory_allocator->mpool_alloc(memory_allocator, - sizeof(void *) * size + 2*CACHE_LINE_SIZE, CACHE_LINE_SIZE, 0, - NULL); + buf = (char *) tail_mpool->mpool_alloc(tail_mpool, + sizeof(void *) * size + CACHE_LINE_SIZE, getpagesize(), 0, NULL); if (NULL == buf) { return OMPI_ERR_OUT_OF_RESOURCE; } - fifo->queue = (volatile void**)(buf + 2*CACHE_LINE_SIZE); + fifo->queue = (volatile void**)(buf + CACHE_LINE_SIZE); /* buffer address in a receiver address space */ fifo->recv_queue = (volatile void**)((char*)fifo->queue - offset); /* initialize the queue entries */ for (i = 0; i < size; i++) { fifo->queue[i] = OMPI_CB_FREE; } + fifo->tail = (ompi_cb_fifo_ctl_t*)buf; - fifo->head = (ompi_cb_fifo_ctl_t*)buf; - /* head address in a receiver address space */ - fifo->recv_head = (ompi_cb_fifo_ctl_t*)((char*)fifo->head - offset); - fifo->tail = (ompi_cb_fifo_ctl_t*)(buf + CACHE_LINE_SIZE); - - /* initialize the head structure */ - opal_atomic_unlock(&(fifo->head->lock)); - fifo->head->fifo_index=0; - fifo->head->num_to_clear=0; - - /* initialize the head structure */ + /* initialize the tail structure */ opal_atomic_unlock(&(fifo->tail->lock)); fifo->tail->fifo_index=0; fifo->tail->num_to_clear=0; @@ -186,6 +177,18 @@ static inline int ompi_cb_fifo_init(int size_of_fifo, /* recalculate tail address in a receiver address space */ fifo->tail = (ompi_cb_fifo_ctl_t*)((char*)fifo->tail - offset); + fifo->head = (ompi_cb_fifo_ctl_t*)head_mpool->mpool_alloc(head_mpool, + sizeof(ompi_cb_fifo_ctl_t), getpagesize(), 0, NULL); + + /* head address in a receiver address space */ + fifo->recv_head = (ompi_cb_fifo_ctl_t*)((char*)fifo->head - offset); + + /* initialize the head structure */ + opal_atomic_unlock(&(fifo->head->lock)); + fifo->head->fifo_index=0; + fifo->head->num_to_clear=0; + + /* return */ return OMPI_SUCCESS; } diff --git a/ompi/class/ompi_fifo.h b/ompi/class/ompi_fifo.h index f5feec2100..0f1c4ee4d6 100644 --- a/ompi/class/ompi_fifo.h +++ b/ompi/class/ompi_fifo.h @@ -211,13 +211,13 @@ struct ompi_fifo_t { int cb_count; /* fifo memory locality index */ - int fifo_memory_locality_index; + mca_mpool_base_module_t *fifo_mpool; /* head memory locality index */ - int head_memory_locality_index; + mca_mpool_base_module_t *head_mpool; /* tail memory locality index */ - int tail_memory_locality_index; + mca_mpool_base_module_t *tail_mpool; /* offset between sender and receiver shared mapping */ ptrdiff_t offset; @@ -259,10 +259,11 @@ typedef struct ompi_fifo_t ompi_fifo_t; * */ static inline int ompi_fifo_init(int size_of_cb_fifo, - int lazy_free_freq, int cb_num_limit, int fifo_memory_locality_index, - int head_memory_locality_index, int tail_memory_locality_index, - ompi_fifo_t *fifo, ptrdiff_t offset, - mca_mpool_base_module_t *memory_allocator) + int lazy_free_freq, int cb_num_limit, + mca_mpool_base_module_t *fifo_mpool, + mca_mpool_base_module_t *head_mpool, + mca_mpool_base_module_t *tail_mpool, + ompi_fifo_t *fifo, ptrdiff_t offset) { int error_code; @@ -270,24 +271,23 @@ static inline int ompi_fifo_init(int size_of_cb_fifo, fifo->size = size_of_cb_fifo; /*we allocate one cb below so subtract one here */ fifo->cb_count = cb_num_limit - 1; - fifo->fifo_memory_locality_index = fifo_memory_locality_index; - fifo->head_memory_locality_index = head_memory_locality_index; - fifo->tail_memory_locality_index = tail_memory_locality_index; + fifo->fifo_mpool = fifo_mpool; + fifo->head_mpool = head_mpool; + fifo->tail_mpool = tail_mpool; /* allocate head ompi_cb_fifo_t structure and place for head and tail locks * on different cache lines */ - fifo->head = (ompi_cb_fifo_wrapper_t*)memory_allocator->mpool_alloc( - memory_allocator, sizeof(ompi_cb_fifo_wrapper_t), CACHE_LINE_SIZE, + fifo->head = (ompi_cb_fifo_wrapper_t*)fifo_mpool->mpool_alloc( + fifo_mpool, sizeof(ompi_cb_fifo_wrapper_t), getpagesize(), 0, NULL); if(NULL == fifo->head) { return OMPI_ERR_OUT_OF_RESOURCE; } /* initialize the circular buffer fifo head structure */ - error_code=ompi_cb_fifo_init(size_of_cb_fifo, - lazy_free_freq, fifo_memory_locality_index, - head_memory_locality_index, tail_memory_locality_index, - &(fifo->head->cb_fifo), offset, memory_allocator); + error_code = ompi_cb_fifo_init(size_of_cb_fifo, + lazy_free_freq, head_mpool, tail_mpool, &(fifo->head->cb_fifo), + offset); if ( OMPI_SUCCESS != error_code ) { return error_code; } @@ -314,8 +314,7 @@ static inline int ompi_fifo_init(int size_of_cb_fifo, * @returncode Slot index to which data is written * */ -static inline int ompi_fifo_write_to_head(void *data, - ompi_fifo_t *fifo, mca_mpool_base_module_t *fifo_allocator) +static inline int ompi_fifo_write_to_head(void *data, ompi_fifo_t *fifo) { int error_code; ompi_cb_fifo_wrapper_t *next_ff; @@ -343,7 +342,7 @@ static inline int ompi_fifo_write_to_head(void *data, /* We retry to write to the old head before creating new one just in * case consumer read all entries after first attempt failed, but * before we set cb_overflow to true */ - error_code=ompi_cb_fifo_write_to_head(data, &fifo->head->cb_fifo); + error_code = ompi_cb_fifo_write_to_head(data, &fifo->head->cb_fifo); if(error_code != OMPI_CB_ERROR) { fifo->head->cb_overflow = false; @@ -361,9 +360,10 @@ static inline int ompi_fifo_write_to_head(void *data, if(0 == fifo->cb_count) next_ff = NULL; else - next_ff = (ompi_cb_fifo_wrapper_t*)fifo_allocator->mpool_alloc( - fifo_allocator, sizeof(ompi_cb_fifo_wrapper_t), - CACHE_LINE_SIZE, 0, NULL); + next_ff = (ompi_cb_fifo_wrapper_t*) + fifo->fifo_mpool->mpool_alloc(fifo->fifo_mpool, + sizeof(ompi_cb_fifo_wrapper_t), getpagesize(), 0, + NULL); if (NULL == next_ff) { opal_atomic_unlock(&fifo->fifo_lock); return OMPI_ERR_OUT_OF_RESOURCE; @@ -372,12 +372,10 @@ static inline int ompi_fifo_write_to_head(void *data, /* initialize the circular buffer fifo head structure */ error_code = ompi_cb_fifo_init(fifo->size, fifo->head->cb_fifo.lazy_free_frequency, - fifo->fifo_memory_locality_index, - fifo->head_memory_locality_index, - fifo->tail_memory_locality_index, - &(next_ff->cb_fifo), fifo->offset, fifo_allocator); + fifo->head_mpool, fifo->tail_mpool, + &(next_ff->cb_fifo), fifo->offset); if (OMPI_SUCCESS != error_code) { - fifo_allocator->mpool_free(fifo_allocator, next_ff, NULL); + fifo->fifo_mpool->mpool_free(fifo->fifo_mpool, next_ff, NULL); opal_atomic_unlock(&fifo->fifo_lock); return error_code; } diff --git a/ompi/mca/btl/sm/btl_sm.c b/ompi/mca/btl/sm/btl_sm.c index 0bff6d51d2..4d5109cb8b 100644 --- a/ompi/mca/btl/sm/btl_sm.c +++ b/ompi/mca/btl/sm/btl_sm.c @@ -31,6 +31,10 @@ #include "opal/sys/atomic.h" #include "orte/util/show_help.h" #include "opal/util/if.h" +#include "opal/mca/carto/carto.h" +#include "opal/mca/carto/base/base.h" +#include "opal/mca/paffinity/base/base.h" +#include "opal/mca/maffinity/base/base.h" #include "orte/util/proc_info.h" #include "opal/util/printf.h" #include "ompi/class/ompi_fifo.h" @@ -39,6 +43,7 @@ #include "ompi/mca/btl/btl.h" #include "ompi/mca/mpool/base/base.h" #include "ompi/mca/common/sm/common_sm_mmap.h" +#include "ompi/mca/mpool/sm/mpool_sm.h" #include "btl_sm.h" #include "btl_sm_endpoint.h" #include "btl_sm_frag.h" @@ -93,9 +98,10 @@ static void *mpool_calloc(size_t nmemb, size_t size) { void *buf; size_t bsize = nmemb * size; + mca_mpool_base_module_t *mpool = mca_btl_sm_component.sm_mpool; + + buf = mpool->mpool_alloc(mpool, bsize, CACHE_LINE_SIZE, 0, NULL); - buf = mca_btl_sm_component.sm_mpool->mpool_alloc( - mca_btl_sm_component.sm_mpool, bsize, CACHE_LINE_SIZE, 0, NULL); if (NULL == buf) return NULL; @@ -128,27 +134,91 @@ static int init_fifos(ompi_fifo_t *f, int n) return OMPI_SUCCESS; } +static void init_maffinity(int *my_mem_node, int *max_mem_node) +{ + static opal_carto_graph_t *topo; + opal_value_array_t dists; + int i, num_core, max_core, socket, rc; + opal_paffinity_base_cpu_set_t cpus; + char *myslot = NULL; + opal_carto_node_distance_t *dist; + opal_carto_base_node_t *slot_node; + + *my_mem_node = 0; + *max_mem_node = 1; + + if(opal_carto_base_get_host_graph(&topo, "Memory") != OMPI_SUCCESS) + return; + + OBJ_CONSTRUCT(&dists, opal_value_array_t); + opal_value_array_init(&dists, sizeof(opal_carto_node_distance_t)); + + if(opal_paffinity_base_get_processor_info(&num_core, &max_core) != + OMPI_SUCCESS) + max_core = 100; + + OPAL_PAFFINITY_CPU_ZERO(cpus); + opal_paffinity_base_get(&cpus); + + /* find core we are running on */ + for(i = 0; i < max_core; i++) + if(OPAL_PAFFINITY_CPU_ISSET(i, cpus)) + break; + + rc = opal_paffinity_base_map_to_socket_core(i, &socket, &i); + asprintf(&myslot, "slot%d", socket); + + slot_node = opal_carto_base_find_node(topo, myslot); + + if(NULL == slot_node) + goto out; + + opal_carto_base_get_nodes_distance(topo, slot_node, "Memory", &dists); + if((*max_mem_node = opal_value_array_get_size(&dists)) < 2) + goto out; + + dist = opal_value_array_get_item(&dists, 0); + opal_maffinity_base_node_name_to_id(dist->node->node_name, my_mem_node); +out: + if(myslot) free(myslot); + OBJ_DESTRUCT(&dists); + opal_carto_base_free_graph(topo); +} + static int sm_btl_first_time_init(mca_btl_sm_t *sm_btl, int n) { size_t size, length, length_payload; char *sm_ctl_file; ompi_fifo_t *my_fifos; + int my_mem_node=-1, num_mem_nodes=-1, i; + + init_maffinity(&my_mem_node, &num_mem_nodes); + mca_btl_sm_component.mem_node = my_mem_node; + mca_btl_sm_component.num_mem_nodes = num_mem_nodes; /* lookup shared memory pool */ - mca_btl_sm_component.sm_mpool = - mca_mpool_base_module_lookup(mca_btl_sm_component.sm_mpool_name); - if(NULL == mca_btl_sm_component.sm_mpool) { - mca_btl_sm_component.sm_mpool = + mca_btl_sm_component.sm_mpools = calloc(num_mem_nodes, + sizeof(mca_mpool_base_module_t*)); + + /* create mpool for each memory node */ + for(i = 0; i < num_mem_nodes; i++) { + mca_mpool_base_resources_t res; + /* disable memory binding if there is only one memory node */ + res.mem_node = (num_mem_nodes == 1) ? -1 : i; + mca_btl_sm_component.sm_mpools[i] = mca_mpool_base_module_create(mca_btl_sm_component.sm_mpool_name, - sm_btl, NULL); + sm_btl, &res); + /* Sanity check to ensure that we found it */ + if(NULL == mca_btl_sm_component.sm_mpools[i]) + return OMPI_ERR_OUT_OF_RESOURCE; + + if(i == my_mem_node) + mca_btl_sm_component.sm_mpool = mca_btl_sm_component.sm_mpools[i]; } - /* Sanity check to ensure that we found it */ - if(NULL == mca_btl_sm_component.sm_mpool) - return OMPI_ERR_OUT_OF_RESOURCE; mca_btl_sm_component.sm_mpool_base = - mca_btl_sm_component.sm_mpool->mpool_base(mca_btl_sm_component.sm_mpool); + mca_btl_sm_component.sm_mpools[0]->mpool_base(mca_btl_sm_component.sm_mpools[0]); /* set the shared memory offset */ mca_btl_sm_component.sm_offset = (ptrdiff_t*)calloc(n, sizeof(ptrdiff_t)); @@ -173,7 +243,7 @@ static int sm_btl_first_time_init(mca_btl_sm_t *sm_btl, int n) /* Pass in a data segment alignment of 0 to get no data segment (only the shared control structure) */ size = sizeof(mca_common_sm_file_header_t) + - n * (sizeof(ompi_fifo_t*) + sizeof(char *)) + CACHE_LINE_SIZE; + n * (sizeof(ompi_fifo_t*) + sizeof(char *) + sizeof(uint16_t)) + CACHE_LINE_SIZE; if(!(mca_btl_sm_component.mmap_file = mca_common_sm_mmap_init(size, sm_ctl_file, sizeof(mca_common_sm_file_header_t), @@ -202,6 +272,7 @@ static int sm_btl_first_time_init(mca_btl_sm_t *sm_btl, int n) mca_btl_sm_component.shm_fifo = (ompi_fifo_t **)mca_btl_sm_component.mmap_file->data_addr; mca_btl_sm_component.shm_bases = (char**)(mca_btl_sm_component.shm_fifo + n); + mca_btl_sm_component.shm_mem_nodes = (uint16_t*)(mca_btl_sm_component.shm_bases + n); /* Sync with other local procs. (Do we have to?) */ if(0 == mca_btl_sm_component.my_smp_rank) { @@ -220,6 +291,8 @@ static int sm_btl_first_time_init(mca_btl_sm_t *sm_btl, int n) /* set the base of the shared memory segment */ mca_btl_sm_component.shm_bases[mca_btl_sm_component.my_smp_rank] = (char*)mca_btl_sm_component.sm_mpool_base; + mca_btl_sm_component.shm_mem_nodes[mca_btl_sm_component.my_smp_rank] = + (uint16_t)my_mem_node; /* * initialize the array of fifo's "owned" by this process @@ -247,6 +320,10 @@ static int sm_btl_first_time_init(mca_btl_sm_t *sm_btl, int n) mca_btl_sm_component.fifo[mca_btl_sm_component.my_smp_rank] = my_fifos; + mca_btl_sm_component.mem_nodes = malloc(sizeof(uint16_t) * n); + if(NULL == mca_btl_sm_component.mem_nodes) + return OMPI_ERR_OUT_OF_RESOURCE; + /* initialize fragment descriptor free lists */ /* allocation will be for the fragment descriptor and payload buffer */ @@ -418,6 +495,7 @@ int mca_btl_sm_add_procs( for(j = mca_btl_sm_component.num_smp_procs; j < mca_btl_sm_component.num_smp_procs + n_local_procs; j++) { ptrdiff_t diff; + int peer_mem_node; if(j == my_smp_rank) continue; @@ -443,14 +521,21 @@ int mca_btl_sm_add_procs( (opal_atomic_lock_t*)OFFSET2ADDR(diff, mca_btl_sm_component.fifo[j][my_smp_rank].head_lock); } + /* cache local copy of peer memory node number */ + peer_mem_node = mca_btl_sm_component.mem_nodes[j] = mca_btl_sm_component.shm_mem_nodes[j]; + /* Initialize fifo for use. Note that sender does initialization */ return_code = ompi_fifo_init(mca_btl_sm_component.size_of_cb_queue, mca_btl_sm_component.cb_lazy_free_freq, mca_btl_sm_component.cb_max_num, - 0,0,0, + /* fifo mpool */ + mca_btl_sm_component.sm_mpools[peer_mem_node], + /* head mpool */ + mca_btl_sm_component.sm_mpool, + /* tail mpool */ + mca_btl_sm_component.sm_mpools[peer_mem_node], &mca_btl_sm_component.fifo[j][my_smp_rank], - mca_btl_sm_component.sm_offset[j], - mca_btl_sm_component.sm_mpool); + mca_btl_sm_component.sm_offset[j]); if(return_code != OMPI_SUCCESS) goto CLEANUP; diff --git a/ompi/mca/btl/sm/btl_sm.h b/ompi/mca/btl/sm/btl_sm.h index eb6cbc3b29..9edb42c84f 100644 --- a/ompi/mca/btl/sm/btl_sm.h +++ b/ompi/mca/btl/sm/btl_sm.h @@ -48,6 +48,8 @@ #include "ompi/mca/mpool/mpool.h" #include "ompi/mca/common/sm/common_sm_mmap.h" +#include "opal/mca/maffinity/base/base.h" + #if defined(c_plusplus) || defined(__cplusplus) extern "C" { #endif @@ -61,6 +63,10 @@ extern "C" { #define DONE (char)1 #endif +typedef struct mca_btl_sm_mem_node_t { + mca_mpool_base_module_t* sm_mpool; /**< shared memory pool */ +} mca_btl_sm_mem_node_t; + /** * Shared Memory (SM) BTL module. */ @@ -72,7 +78,8 @@ struct mca_btl_sm_component_t { int32_t sm_max_procs; /**< upper limit on the number of processes using the shared memory pool */ int sm_extra_procs; /**< number of extra procs to allow */ char* sm_mpool_name; /**< name of shared memory pool module */ - mca_mpool_base_module_t* sm_mpool; /**< shared memory pool */ + mca_mpool_base_module_t **sm_mpools; /**< shared memory pools (one for each memory node */ + mca_mpool_base_module_t *sm_mpool; /**< mpool on local node */ void* sm_mpool_base; /**< base address of shared memory pool */ size_t eager_limit; /**< first fragment size */ size_t max_frag_size; /**< maximum (second and beyone) fragment size */ @@ -82,11 +89,13 @@ struct mca_btl_sm_component_t { shared memory */ ompi_fifo_t **shm_fifo; /**< pointer to fifo 2D array in shared memory */ char **shm_bases; /**< pointer to base pointers in shared memory */ + uint16_t *shm_mem_nodes; /**< pointer to mem noded in shared memory */ ompi_fifo_t **fifo; /**< cached copy of the pointer to the 2D fifo array. The address in the shared memory segment sm_ctl_header is a relative, but this one, in process private memory, is a real virtual address */ + uint16_t *mem_nodes; /**< cached copy of mem nodes of each local rank */ size_t size_of_cb_queue; /**< size of each circular buffer queue array */ size_t cb_lazy_free_freq; /**< frequency of lazy free */ int cb_max_num; /**< max number of circular buffers for each peer */ @@ -103,7 +112,9 @@ struct mca_btl_sm_component_t { struct mca_btl_base_endpoint_t **sm_peers; opal_free_list_t pending_send_fl; - + int mem_node; + int num_mem_nodes; + #if OMPI_ENABLE_PROGRESS_THREADS == 1 char sm_fifo_path[PATH_MAX]; /**< path to fifo used to signal this process */ int sm_fifo_fd; /**< file descriptor corresponding to opened fifo */ diff --git a/ompi/mca/btl/sm/btl_sm_fifo.h b/ompi/mca/btl/sm/btl_sm_fifo.h index 4d5a29f4cf..3208f117ae 100644 --- a/ompi/mca/btl/sm/btl_sm_fifo.h +++ b/ompi/mca/btl/sm/btl_sm_fifo.h @@ -14,7 +14,7 @@ do { \ if(opal_using_threads()) \ opal_atomic_lock(fifo->head_lock); \ /* post fragment */ \ - if(ompi_fifo_write_to_head(hdr, fifo, mca_btl_sm_component.sm_mpool) \ + if(ompi_fifo_write_to_head(hdr, fifo) \ != OMPI_SUCCESS) { \ btl_sm_add_pending(endpoint_peer, hdr, resend); \ rc = OMPI_ERR_RESOURCE_BUSY; \ diff --git a/ompi/mca/mpool/sm/mpool_sm.h b/ompi/mca/mpool/sm/mpool_sm.h index 71eacb5335..3bb30f120c 100644 --- a/ompi/mca/mpool/sm/mpool_sm.h +++ b/ompi/mca/mpool/sm/mpool_sm.h @@ -5,15 +5,15 @@ * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ /** @@ -42,18 +42,23 @@ struct mca_mpool_sm_component_t { }; typedef struct mca_mpool_sm_component_t mca_mpool_sm_component_t; +typedef struct mca_mpool_base_resources_t { + int32_t mem_node; +} mca_mpool_base_resources_t; + OMPI_MODULE_DECLSPEC extern mca_mpool_sm_component_t mca_mpool_sm_component; -struct mca_mpool_sm_module_t { - mca_mpool_base_module_t super; - mca_allocator_base_module_t * sm_allocator; - struct mca_mpool_sm_mmap_t *sm_mmap; -}; typedef struct mca_mpool_sm_module_t mca_mpool_sm_module_t; +typedef struct mca_mpool_sm_module_t { + mca_mpool_base_module_t super; + mca_allocator_base_module_t * sm_allocator; + struct mca_mpool_sm_mmap_t *sm_mmap; + int32_t mem_node; +} mca_mpool_sm_module_t; -/* - * Initializes the mpool module. - */ -void mca_mpool_sm_module_init(mca_mpool_sm_module_t* mpool); +/* + * Initializes the mpool module. + */ +void mca_mpool_sm_module_init(mca_mpool_sm_module_t* mpool); /* @@ -64,10 +69,10 @@ void* mca_mpool_sm_base(mca_mpool_base_module_t*); /** * Allocate block of shared memory. */ -void* mca_mpool_sm_alloc( - mca_mpool_base_module_t* mpool, - size_t size, - size_t align, +void* mca_mpool_sm_alloc( + mca_mpool_base_module_t* mpool, + size_t size, + size_t align, uint32_t flags, mca_mpool_base_registration_t** registration); @@ -75,17 +80,17 @@ void* mca_mpool_sm_alloc( * realloc function typedef */ void* mca_mpool_sm_realloc( - mca_mpool_base_module_t* mpool, - void* addr, - size_t size, + mca_mpool_base_module_t* mpool, + void* addr, + size_t size, mca_mpool_base_registration_t** registration); /** * free function typedef */ void mca_mpool_sm_free( - mca_mpool_base_module_t* mpool, - void * addr, + mca_mpool_base_module_t* mpool, + void * addr, mca_mpool_base_registration_t* registration); /** diff --git a/ompi/mca/mpool/sm/mpool_sm_component.c b/ompi/mca/mpool/sm/mpool_sm_component.c index f78bd3801f..3401cb393b 100644 --- a/ompi/mca/mpool/sm/mpool_sm_component.c +++ b/ompi/mca/mpool/sm/mpool_sm_component.c @@ -5,15 +5,15 @@ * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -65,7 +65,7 @@ mca_mpool_sm_component_t mca_mpool_sm_component = { }, /* Next the MCA v1.0.0 component meta data */ - + { /* The component is not checkpoint ready */ MCA_BASE_METADATA_PARAM_NONE @@ -89,7 +89,7 @@ static int mca_mpool_sm_open(void) default_max = 512*1024*1024; default_min = 128*1024*1024; default_peer = 32*1024*1024; - + /* register SM component parameters */ mca_base_param_reg_string(&mca_mpool_sm_component.super.mpool_version, "allocator", @@ -151,12 +151,12 @@ static mca_mpool_base_module_t* mca_mpool_sm_init( { char *file_name; int len; - mca_mpool_sm_module_t* mpool_module; + mca_mpool_sm_module_t* mpool_module; mca_allocator_base_component_t* allocator_component; long max_size, min_size, peer_size; ompi_proc_t **procs; size_t num_all_procs, i, num_local_procs = 0; - + /* README: this needs to change if procs in different jobs (even spawned ones) are to talk using shared memory */ procs = ompi_proc_world(&num_all_procs); @@ -166,9 +166,9 @@ static mca_mpool_base_module_t* mca_mpool_sm_init( } } free(procs); - + /* parse the max, min and peer sizes, and validate them */ - /* absolutely necessary to reset errno each time */ + /* absolutely necessary to reset errno each time */ errno = 0; max_size = strtol(max_size_param, (char **)NULL, 10); if (errno == ERANGE) { @@ -178,7 +178,7 @@ static mca_mpool_base_module_t* mca_mpool_sm_init( opal_output(0, "mca_mpool_sm_init: invalid max_size entered. set it to (%ld)", default_max); max_size = default_max; } - + errno = 0; min_size = strtol(min_size_param, (char **)NULL, 10); if (errno == ERANGE) { @@ -246,8 +246,8 @@ static mca_mpool_base_module_t* mca_mpool_sm_init( } } - mpool_module = (mca_mpool_sm_module_t*)malloc(sizeof(mca_mpool_sm_module_t)); - mca_mpool_sm_module_init(mpool_module); + mpool_module = (mca_mpool_sm_module_t*)malloc(sizeof(mca_mpool_sm_module_t)); + mca_mpool_sm_module_init(mpool_module); /* create initial shared memory mapping */ len = asprintf( &file_name, "%s"OPAL_PATH_SEP"shared_mem_pool.%s", diff --git a/ompi/mca/mpool/sm/mpool_sm_module.c b/ompi/mca/mpool/sm/mpool_sm_module.c index 5da0451aa6..560e9993d2 100644 --- a/ompi/mca/mpool/sm/mpool_sm_module.c +++ b/ompi/mca/mpool/sm/mpool_sm_module.c @@ -21,6 +21,11 @@ #include "orte/util/show_help.h" #include "ompi/mca/mpool/sm/mpool_sm.h" #include "ompi/mca/common/sm/common_sm_mmap.h" +#include +#include "opal/include/opal/align.h" +#include "opal/mca/maffinity/maffinity.h" +#include "opal/mca/maffinity/maffinity_types.h" +#include "opal/mca/maffinity/base/base.h" /* @@ -51,39 +56,60 @@ void* mca_mpool_sm_base(mca_mpool_base_module_t* mpool) } /** - * allocate function + * allocate function */ void* mca_mpool_sm_alloc( - mca_mpool_base_module_t* mpool, - size_t size, - size_t align, + mca_mpool_base_module_t* mpool, + size_t size, + size_t align, uint32_t flags, mca_mpool_base_registration_t** registration) { - mca_mpool_sm_module_t* mpool_sm = (mca_mpool_sm_module_t*)mpool; - return mpool_sm->sm_allocator->alc_alloc(mpool_sm->sm_allocator, size, align, registration); + mca_mpool_sm_module_t* mpool_sm = (mca_mpool_sm_module_t*)mpool; + opal_maffinity_base_segment_t mseg; + + mseg.mbs_start_addr = + mpool_sm->sm_allocator->alc_alloc(mpool_sm->sm_allocator, size, + OPAL_ALIGN(align, getpagesize(), size_t), registration); + + if(mpool_sm->mem_node >= 0) { + mseg.mbs_len = size; + opal_maffinity_base_bind(&mseg, 1, mpool_sm->mem_node); + } + + return mseg.mbs_start_addr; } /** - * realloc function + * realloc function */ void* mca_mpool_sm_realloc( - mca_mpool_base_module_t* mpool, - void* addr, - size_t size, + mca_mpool_base_module_t* mpool, + void* addr, + size_t size, mca_mpool_base_registration_t** registration) { - mca_mpool_sm_module_t* mpool_sm = (mca_mpool_sm_module_t*)mpool; - return mpool_sm->sm_allocator->alc_realloc(mpool_sm->sm_allocator, addr, size, registration); + mca_mpool_sm_module_t* mpool_sm = (mca_mpool_sm_module_t*)mpool; + opal_maffinity_base_segment_t mseg; + + mseg.mbs_start_addr = + mpool_sm->sm_allocator->alc_realloc(mpool_sm->sm_allocator, addr, size, + registration); + if(mpool_sm->mem_node >= 0) { + mseg.mbs_len = size; + opal_maffinity_base_bind(&mseg, 1, mpool_sm->mem_node); + } + + return mseg.mbs_start_addr; } /** - * free function + * free function */ -void mca_mpool_sm_free(mca_mpool_base_module_t* mpool, void * addr, +void mca_mpool_sm_free(mca_mpool_base_module_t* mpool, void * addr, mca_mpool_base_registration_t* registration) { - mca_mpool_sm_module_t* mpool_sm = (mca_mpool_sm_module_t*)mpool; + mca_mpool_sm_module_t* mpool_sm = (mca_mpool_sm_module_t*)mpool; mpool_sm->sm_allocator->alc_free(mpool_sm->sm_allocator, addr); }