First commit of the multi-threaded GM driver.

- special function for thread - now work on all thread models ... - cleanup the initialization and finalization parts This commit was SVN r3447.
2004-10-30 06:55:18 +00:00 · 2004-10-30 06:55:18 +00:00 · d08e56f3d1
--- a/src/mca/ptl/gm/src/ptl_gm.c
+++ b/src/mca/ptl/gm/src/ptl_gm.c
@ -36,7 +36,6 @@ mca_ptl_gm_module_t mca_ptl_gm_module = {
 	0, /* latency */
 	0, /* bandwidth */
 	MCA_PTL_PUT,  /* ptl flags */
-
 	/* collection of interfaces */
 	mca_ptl_gm_add_procs,
 	mca_ptl_gm_del_procs,
@ -64,8 +63,7 @@ mca_ptl_gm_add_procs (struct mca_ptl_base_module_t *ptl,
                      struct mca_ptl_base_peer_t **peers,
                      ompi_bitmap_t * reachable)
 {
-    int         i,j;
-    int num_peer_ptls = 1;
+    uint32_t i, j, num_peer_ptls = 1;
    struct ompi_proc_t *ompi_proc;
    mca_ptl_gm_proc_t *ptl_proc;
    mca_ptl_gm_peer_t *ptl_peer;
@ -102,7 +100,7 @@ mca_ptl_gm_add_procs (struct mca_ptl_base_module_t *ptl,
            ptl_peer->global_id = ptl_proc->proc_addrs->global_id;
            ptl_peer->port_number = ptl_proc->proc_addrs->port_id;
            if (GM_SUCCESS !=
-                gm_global_id_to_node_id (((mca_ptl_gm_module_t *) ptl)->my_port,
+                gm_global_id_to_node_id (((mca_ptl_gm_module_t *) ptl)->gm_port,
                                         ptl_proc->proc_addrs[j].global_id,
                                         &lid)) {
                ompi_output (0,
@ -147,9 +145,33 @@ mca_ptl_gm_del_procs (struct mca_ptl_base_module_t *ptl,
 *
 */
 int
-mca_ptl_gm_finalize (struct mca_ptl_base_module_t *ptl)
+mca_ptl_gm_finalize (struct mca_ptl_base_module_t *base_ptl)
 {
-    free (ptl);
+    void* thread_return;
+    uint32_t index;
+    mca_ptl_gm_module_t* ptl = (mca_ptl_gm_module_t*)base_ptl;
+
+    /* we should do the same things as in the init step in reverse order.
+     * First we shutdown all threads if there are any.
+     */
+    if( 0 != ptl->thread.t_handle ) {
+	pthread_cancel( ptl->thread.t_handle );
+	ompi_thread_join( &(ptl->thread), &thread_return );
+    }
+    for( index = 0; index < mca_ptl_gm_component.gm_num_ptl_modules; index++ ) {
+	if( mca_ptl_gm_component.gm_ptl_modules[index] == ptl ) {
+	    mca_ptl_gm_component.gm_ptl_modules[index] = NULL;
+	    OMPI_OUTPUT((0, "GM ptl %p succesfully finalized\n", (void*)ptl));
+	}
+    }
+
+    /* based on the fact that the port 0 is reserved, we can use ZERO
+     * to mark a port as unused.
+     */
+    if( 0 != ptl->gm_port ) gm_close( ptl->gm_port );
+    ptl->gm_port = 0;
+    free( ptl );
+
    return OMPI_SUCCESS;
 }

@ -201,7 +223,6 @@ mca_ptl_gm_request_fini (struct mca_ptl_base_module_t *ptl,
    frag->status = 0;
 #endif

-    A_PRINT("entering request fini\n");
    OBJ_DESTRUCT(request+1);
 }

@ -287,7 +308,7 @@ mca_ptl_gm_put (struct mca_ptl_base_module_t *ptl,

   /* register the user buffer */
   if (offset > 0) {
-       status = gm_register_memory(gm_ptl->my_port, buffer_ptr, bytes_reg);
+       status = gm_register_memory(gm_ptl->gm_port, buffer_ptr, bytes_reg);
       if(GM_SUCCESS != status) {
          ompi_output(0,"[%s:%d] Unable to register memory\n",__FILE__,__LINE__);
       } 
@ -317,7 +338,6 @@ mca_ptl_gm_put (struct mca_ptl_base_module_t *ptl,
   rc = mca_ptl_gm_peer_send (putfrag->peer,putfrag,sendreq,
                                offset,&size,flags);
   assert(rc == 0);
-   A_PRINT("after issuing the put completion(fin) for the request");
 #endif

   return OMPI_SUCCESS;
@ -387,7 +407,7 @@ mca_ptl_gm_matched( mca_ptl_base_module_t * ptl,
             bytes_recv = frag->frag_base.frag_size; 
             bytes_reg = total_bytes - bytes_recv;
             buffer_ptr += bytes_recv;
-             status = gm_register_memory(gm_ptl->my_port, buffer_ptr, bytes_reg);
+             status = gm_register_memory(gm_ptl->gm_port, buffer_ptr, bytes_reg);
             recv_frag->registered_buf = buffer_ptr;
             A_PRINT("Receiver: register addr: %p, bytes: %d\n",buffer_ptr,bytes_reg);

--- a/src/mca/ptl/gm/src/ptl_gm.h
+++ b/src/mca/ptl/gm/src/ptl_gm.h
@ -1,3 +1,5 @@
+/* -*- Mode: C; c-basic-offset:4 ; -*- */
+
 /*
 * $HEADER$
 */
@ -20,7 +22,6 @@
 #define MCA_PTL_GM_STATISTICS 0
 #define GM_SIZE 30
 #define THRESHOLD 16384 
-#define MAX_GM_PORTS 16
 #define MAX_RECV_TOKENS 256
 #define PTL_GM_ADMIN_SEND_TOKENS 0
 #define PTL_GM_ADMIN_RECV_TOKENS 0
@ -43,6 +44,10 @@ struct mca_ptl_gm_component_t {
    int         gm_free_list_num;        /**< initial size of free lists */
    int         gm_free_list_max;        /**< maximum size of free lists */
    int         gm_free_list_inc;        /**< number of elements to alloc when growing free lists */
+    int         gm_max_port_number;      /**< maximum number of ports by board */
+    int         gm_max_boards_number;    /**< maximum number of boards on the node */
+    int         gm_max_rdma_frag_size;   /**< maximum fragment size used to transfer data over RDMA */
+    char*       gm_port_name;            /**< the name used to get the port */
    struct mca_ptl_gm_proc_t* gm_local;
    ompi_list_t gm_procs;
    ompi_list_t gm_send_req;
@ -60,10 +65,10 @@ extern mca_ptl_gm_component_t mca_ptl_gm_component;
 */
 struct mca_ptl_gm_module_t {
    mca_ptl_base_module_t super;    /**< base PTL module interface */
-    struct gm_port *my_port;
-    unsigned int my_local_id;
-    unsigned int my_global_id;
-    unsigned int my_port_id;
+    struct gm_port *gm_port;
+    unsigned int local_id;
+    unsigned int global_id;
+    unsigned int port_id;
    unsigned int num_send_tokens;
    unsigned int num_recv_tokens;
    unsigned int max_send_tokens;
@ -74,6 +79,8 @@ struct mca_ptl_gm_module_t {
    ompi_list_t gm_send_frags_queue;
    ompi_list_t gm_pending_acks;
    ompi_list_t gm_recv_outstanding_queue;
+
+    ompi_thread_t thread;
 #if MCA_PTL_GM_STATISTICS
    size_t      ptl_bytes_sent;
    size_t      ptl_bytes_recv;
--- a/src/mca/ptl/gm/src/ptl_gm_component.c
+++ b/src/mca/ptl/gm/src/ptl_gm_component.c
@ -63,52 +63,34 @@ static bool mca_ptl_gm_component_initialized = false;
 */

 static inline char *
-mca_ptl_gm_param_register_string (const char *param_name,
-                                  const char *default_value)
+mca_ptl_gm_param_register_string( const char *param_name,
+                                  const char *default_value )
 {
-    char       *param_value;
-    int         id =
-        mca_base_param_register_string ("ptl", "gm", param_name, NULL,
-                                        default_value);
+    char *param_value;
+    int  id = mca_base_param_register_string( "ptl", "gm", param_name, NULL,
+					      default_value) ;
    mca_base_param_lookup_string (id, &param_value);
    return param_value;
 }

-
-
 static inline int
-mca_ptl_gm_param_register_int (const char *param_name, int default_value)
+mca_ptl_gm_param_register_int( const char *param_name, int default_value )
 {
-    int         id =
+    int id =
        mca_base_param_register_int ("ptl", "gm", param_name, NULL,
                                     default_value);
-    int         param_value = default_value;
+    int param_value = default_value;
    mca_base_param_lookup_int (id, &param_value);
    return param_value;
 }

-
-
-/*
- *
- */
-static int
-ompi_mca_ptl_gm_finalize (mca_ptl_gm_module_t * gm)
-{
-    /* add code */
-    return OMPI_SUCCESS;
-}
-
-
-
-
 /*
 *  Called by MCA framework to open the module, registers
 *  module parameters.
 */

 int
-mca_ptl_gm_component_open (void)
+mca_ptl_gm_component_open(void)
 {
    /* initialize state */
    mca_ptl_gm_component.gm_ptl_modules = NULL;
@ -122,11 +104,22 @@ mca_ptl_gm_component_open (void)
    /* register GM component parameters */
    mca_ptl_gm_module.super.ptl_first_frag_size =
        mca_ptl_gm_param_register_int ("first_frag_size", 
-		((PTL_GM_FIRST_FRAG_SIZE) - 64));
+                                       ((PTL_GM_FIRST_FRAG_SIZE) - 64));
    mca_ptl_gm_module.super.ptl_min_frag_size =
        mca_ptl_gm_param_register_int ("min_frag_size", 1<<16);
    mca_ptl_gm_module.super.ptl_max_frag_size =
        mca_ptl_gm_param_register_int ("max_frag_size", 256 * 1024);
+    /* Parameters setting the message limits. */
+    mca_ptl_gm_component.gm_port_name =
+        mca_ptl_gm_param_register_string( "port_name", "OMPI_GM" );
+    mca_ptl_gm_component.gm_max_port_number =
+        mca_ptl_gm_param_register_int ("max_ports_number", 16 );
+    mca_ptl_gm_component.gm_max_boards_number =
+        mca_ptl_gm_param_register_int ("max_boards_number", 4 );
+    mca_ptl_gm_component.gm_max_rdma_frag_size =
+        mca_ptl_gm_param_register_int ("max_rdma_frag_size", 512 * 1024);
+    mca_ptl_gm_component.gm_max_ptl_modules = 
+	mca_ptl_gm_param_register_int( "max_ptl_modules", 1 );
    
    mca_ptl_gm_component.gm_free_list_num =
        mca_ptl_gm_param_register_int ("free_list_num", 256);
@ -136,23 +129,11 @@ mca_ptl_gm_component_open (void)
    return OMPI_SUCCESS;
 }

-
 /*
 * component close 
 */
-
 int mca_ptl_gm_component_close (void)
 {
-#ifdef GOPAL_TODO
-    if (OMPI_SUCCESS != ompi_mca_ptl_gm_finalize(&mca_ptl_gm_component)) {
-       ompi_output(0,
-       "[%s:%d] error in finalizing gm state and PTL's.\n",
-       __FILE__, __LINE__);
-       return NULL;
-       }
-
-#endif
-
    if (NULL != mca_ptl_gm_component.gm_ptl_modules)
        free (mca_ptl_gm_component.gm_ptl_modules);

@ -168,30 +149,28 @@ int mca_ptl_gm_component_close (void)
 */

 static int
-mca_ptl_gm_create (int i)
+mca_ptl_gm_create( mca_ptl_gm_module_t** pptl )
 {
    mca_ptl_gm_module_t *ptl;

-    ptl = (mca_ptl_gm_module_t *)malloc (sizeof (mca_ptl_gm_module_t));
+    ptl = (mca_ptl_gm_module_t *)malloc( sizeof(mca_ptl_gm_module_t) );
    if (NULL == ptl) {
-        ompi_output (0,
-                     " ran out of resource to allocate ptl_instance \n");
+        ompi_output( 0, " ran out of resource to allocate ptl_instance \n" );
        return OMPI_ERR_OUT_OF_RESOURCE;
    }
-
+    
+    /* copy the basic informations in the new PTL */
    memcpy (ptl, &mca_ptl_gm_module, sizeof (mca_ptl_gm_module));
-    mca_ptl_gm_component.gm_ptl_modules[i] = ptl;
-
+    ptl->thread.t_handle = (pthread_t)-1;
+    *pptl = ptl;
+    
    return OMPI_SUCCESS;
 }

-
-
 /*
 *  Register GM component addressing information. The MCA framework
 *  will make this available to all peers. 
 */
-
 static int
 mca_ptl_gm_module_store_data_toexchange (void)
 {
@ -209,9 +188,9 @@ mca_ptl_gm_module_store_data_toexchange (void)

    for (i = 0; i < mca_ptl_gm_component.gm_num_ptl_modules; i++) {
        mca_ptl_gm_module_t *ptl = mca_ptl_gm_component.gm_ptl_modules[i];
-        addrs[i].local_id = ptl->my_local_id;
-        addrs[i].global_id = ptl->my_global_id;
-        addrs[i].port_id = ptl->my_port_id;
+        addrs[i].local_id = ptl->local_id;
+        addrs[i].global_id = ptl->global_id;
+        addrs[i].port_id = ptl->port_id;
    }
    rc = mca_base_modex_send (&mca_ptl_gm_component.super.ptlm_version, addrs,
                              size);
@ -219,178 +198,223 @@ mca_ptl_gm_module_store_data_toexchange (void)
    return rc;
 }

-static int
-ompi_mca_ptl_gm_init (mca_ptl_gm_component_t * gm)
+static void*
+mca_ptl_gm_thread_progress( ompi_thread_t* thread )
 {
-    mca_ptl_gm_module_t *ptl;
-    unsigned int board_no, port_no;
-    gm_status_t status;
-    int         i;
-    int         maxptls = 1;    /* maxptls set to 1 */
+    gm_recv_event_t *event;
+    mca_ptl_gm_module_t* ptl = thread->t_arg;

-    mca_ptl_gm_component.gm_max_ptl_modules = maxptls;
-    mca_ptl_gm_component.gm_ptl_modules = malloc (maxptls * 
-                                                  sizeof (mca_ptl_gm_module_t *));
-    if (NULL == mca_ptl_gm_component.gm_ptl_modules)
-        return OMPI_ERR_OUT_OF_RESOURCE;
+    /* This thread enter in a cancel enabled state */
+    pthread_setcancelstate( PTHREAD_CANCEL_ENABLE, NULL );
+    pthread_setcanceltype( PTHREAD_CANCEL_ASYNCHRONOUS, NULL );

-    for (i = 0; i < maxptls; i++) {
-        mca_ptl_gm_create (i);
+    while(1) {
+	event = gm_blocking_receive(ptl->gm_port);
+	mca_ptl_gm_analyze_recv_event( ptl, event );
    }
-
-    /*Hack : we have set the gm_max_ptl_modules to 1 */
-    for (i = 0; i < mca_ptl_gm_component.gm_max_ptl_modules; i++) {
-        ptl = mca_ptl_gm_component.gm_ptl_modules[i];
-
-        /* open the first available gm port for this board  */
-        board_no = i;
-        for (port_no = 2; port_no < MAX_GM_PORTS; port_no++) 
-        { 
-	        GM_DBG(PTL_GM_DBG_COMM,"about to call open port\n");
-	        if (port_no == 3) continue;
-	        /* port 0,1,3 reserved  */
-	        status = gm_open (&(ptl->my_port), board_no, 
-                              port_no, "OMPI-GM", GM_API_VERSION_2_0);  
-
-	        if (GM_SUCCESS == status) {
-	       	    ptl->my_port_id = port_no;
-                mca_ptl_gm_component.gm_num_ptl_modules++;
-	       	    break;
-	        }
-       	}
-
-#if 1
-        /*  Get node local Id */
-        if (GM_SUCCESS != gm_get_node_id (ptl->my_port, &(ptl->my_local_id))) {
-            ompi_output (0, " failure to get local_id \n");
-            return 0;
-        }
-#endif
-
-        /* Convert local id to global id */
-        if (GM_SUCCESS !=
-            gm_node_id_to_global_id (ptl->my_port, ptl->my_local_id,
-                                     &(ptl->my_global_id))) {
-            ompi_output (0, " Error: Unable to get my GM global id \n");
-            return 0;
-        }
-    }
-
-    return OMPI_SUCCESS;
-
+    return PTHREAD_CANCELED;
 }

-
-
-
-static int
-ompi_mca_ptl_gm_init_sendrecv (mca_ptl_gm_component_t * gm)
+/* Scan all ports on the boards. As it's difficult to find the total number of boards
+ * we use a predefined maximum.
+ * Return the number of discovered boards where opening a port was a succesfull operation.
+ */
+static int32_t
+mca_ptl_gm_discover_boards( mca_ptl_gm_module_t** pptl,
+			    uint32_t max_ptls, uint32_t max_boards, uint32_t max_port )
 {
-    int i;
-    mca_ptl_gm_module_t *ptl;
+    uint32_t        board_no, port_no;
+    struct gm_port* gm_port;
+    uint32_t        index = 0;
+    uint32_t        local_id, global_id;
+
+    for( board_no = 0; board_no < max_boards; board_no++ ) {
+
+        /* open the first available gm port for this board  */
+        for( port_no = 2; port_no < max_port; port_no++ ) {
+            if (port_no == 3) continue;  /* port 0,1,3 reserved  */
+            if( GM_SUCCESS == gm_open( &gm_port, board_no, port_no,
+				       mca_ptl_gm_component.gm_port_name, GM_API_VERSION_2_0) )
+		break;
+        }
+        if( port_no == max_port ) continue;
+
+        /*  Get node local Id */
+        if( GM_SUCCESS != gm_get_node_id( gm_port, &local_id) ) {
+            ompi_output (0, " failure to get local_id \n");
+            continue;
+        }
+	
+        /* Convert local id to global id */
+        if (GM_SUCCESS != gm_node_id_to_global_id( gm_port, local_id, &global_id) ) {
+            ompi_output (0, " Error: Unable to get my GM global id \n");
+            continue;
+        }
+
+	/* Create the ptl. If fail return the number of already created */
+	if( OMPI_SUCCESS != mca_ptl_gm_create( &(pptl[index]) ) )
+	    return index;
+
+        pptl[index]->port_id   = port_no;
+        pptl[index]->gm_port   = gm_port;
+	pptl[index]->local_id  = local_id;
+	pptl[index]->global_id = global_id;
+
+	/* everything is OK let's mark it as usable and go to the next one */
+	if( (++index) >= max_ptls ) break;
+    }
+
+    return index;
+}
+
+static inline int
+mca_ptl_gm_init_sendrecv (mca_ptl_gm_module_t * ptl)
+{
+    uint32_t i;
    gm_status_t status;
    void *gm_send_reg_memory , *gm_recv_reg_memory;
    ompi_free_list_t *fslist, *free_rlist;
    mca_ptl_gm_send_frag_t *sfragment;
    mca_ptl_gm_recv_frag_t *free_rfragment;

-    for (i = 0; i < mca_ptl_gm_component.gm_max_ptl_modules; i++) {
-        ptl = mca_ptl_gm_component.gm_ptl_modules[i];
+    ptl->num_send_tokens = gm_num_send_tokens (ptl->gm_port);
+    ptl->num_send_tokens -= PTL_GM_ADMIN_SEND_TOKENS;
+    ptl->num_recv_tokens = gm_num_receive_tokens (ptl->gm_port);
+    ptl->num_recv_tokens -= PTL_GM_ADMIN_RECV_TOKENS;

-        ptl->num_send_tokens = gm_num_send_tokens (ptl->my_port);
-        ptl->num_send_tokens -= PTL_GM_ADMIN_SEND_TOKENS;
-        ptl->num_recv_tokens = gm_num_receive_tokens (ptl->my_port);
-        ptl->num_recv_tokens -= PTL_GM_ADMIN_RECV_TOKENS;
+    /****************SEND****************************/
+    /* construct a list of send fragments */
+    OBJ_CONSTRUCT (&(ptl->gm_send_frags), ompi_free_list_t);
+    OBJ_CONSTRUCT (&(ptl->gm_send_frags_queue), ompi_list_t);
+    fslist = &(ptl->gm_send_frags);

-       /****************SEND****************************/
-        /* construct a list of send fragments */
-        OBJ_CONSTRUCT (&(ptl->gm_send_frags), ompi_free_list_t);
-        OBJ_CONSTRUCT (&(ptl->gm_send_frags_queue), ompi_list_t);
-        fslist = &(ptl->gm_send_frags);
+    ompi_free_list_init (&(ptl->gm_send_frags),
+			 sizeof (mca_ptl_gm_send_frag_t),
+			 OBJ_CLASS (mca_ptl_gm_send_frag_t),
+			 ptl->num_send_tokens, ptl->num_send_tokens, 1, NULL); /* not using mpool */

-        ompi_free_list_init (&(ptl->gm_send_frags),
-                             sizeof (mca_ptl_gm_send_frag_t),
-                             OBJ_CLASS (mca_ptl_gm_send_frag_t),
-                             ptl->num_send_tokens,ptl->num_send_tokens, 1, NULL); /* not using mpool */
+    /* allocate the elements */
+    sfragment = (mca_ptl_gm_send_frag_t *)
+	malloc (sizeof(mca_ptl_gm_send_frag_t) * (ptl->num_send_tokens));

-        /* allocate the elements */
-        sfragment = (mca_ptl_gm_send_frag_t *)
-            malloc (sizeof(mca_ptl_gm_send_frag_t) *
-                    (ptl->num_send_tokens));
-
-        /* allocate the registered memory */
-        gm_send_reg_memory = gm_dma_malloc ( ptl->my_port,
-                              (GM_SEND_BUF_SIZE * ptl->num_send_tokens) );
-        if( NULL == gm_send_reg_memory ) {
-            ompi_output( 0, "unable to allocate registered memory\n" );
-            return OMPI_ERR_OUT_OF_RESOURCE;
-        }
-        for (i = 0; i < ptl->num_send_tokens; i++) {
-            ompi_list_item_t *item;
-            sfragment->send_buf = gm_send_reg_memory;
-            item = (ompi_list_item_t *) sfragment;
-            OMPI_FREE_LIST_RETURN( fslist, item );
-            gm_send_reg_memory = ((char *)gm_send_reg_memory) + GM_SEND_BUF_SIZE;
-            sfragment++;
-
-        }
-        A_PRINT("recv_tokens = %d send_tokens = %d, allocted free lis =
-                %d\n",ptl->num_recv_tokens,ptl->num_send_tokens,fslist->fl_num_allocated);
+    /* allocate the registered memory */
+    gm_send_reg_memory = gm_dma_malloc ( ptl->gm_port,
+					 (GM_SEND_BUF_SIZE * ptl->num_send_tokens) );
+    if( NULL == gm_send_reg_memory ) {
+	ompi_output( 0, "unable to allocate registered memory\n" );
+	return OMPI_ERR_OUT_OF_RESOURCE;
+    }
+    for (i = 0; i < ptl->num_send_tokens; i++) {
+	ompi_list_item_t *item;
+	sfragment->send_buf = gm_send_reg_memory;
+	item = (ompi_list_item_t *) sfragment;
+	OMPI_FREE_LIST_RETURN( fslist, item );
+	gm_send_reg_memory = ((char *)gm_send_reg_memory) + GM_SEND_BUF_SIZE;
+	sfragment++;
+    }
+    A_PRINT( ("recv_tokens = %d send_tokens = %d, allocted free lis = %d\n",
+	      ptl->num_recv_tokens,ptl->num_send_tokens,fslist->fl_num_allocated) );


-        /*****************RECEIVE*****************************/
-        /*allow remote memory access */
-        status = gm_allow_remote_memory_access (ptl->my_port);
-        if (GM_SUCCESS != status) {
-            ompi_output (0, "unable to allow remote memory access\n");
+    /*****************RECEIVE*****************************/
+    /*allow remote memory access */
+    status = gm_allow_remote_memory_access (ptl->gm_port);
+    if (GM_SUCCESS != status) {
+	ompi_output (0, "unable to allow remote memory access\n");
+    }

-        }
+    OBJ_CONSTRUCT (&(ptl->gm_recv_outstanding_queue), ompi_list_t);

-        OBJ_CONSTRUCT (&(ptl->gm_recv_outstanding_queue), ompi_list_t);
-
-        /* construct the list of recv fragments free */
-        OBJ_CONSTRUCT (&(ptl->gm_recv_frags_free), ompi_free_list_t);
-        free_rlist = &(ptl->gm_recv_frags_free);
+    /* construct the list of recv fragments free */
+    OBJ_CONSTRUCT (&(ptl->gm_recv_frags_free), ompi_free_list_t);
+    free_rlist = &(ptl->gm_recv_frags_free);
  
-        ompi_free_list_init (&(ptl->gm_recv_frags_free),
-                             sizeof (mca_ptl_gm_recv_frag_t),
-                             OBJ_CLASS (mca_ptl_gm_recv_frag_t),
-                             ptl->num_recv_tokens,ptl->num_recv_tokens, 1, NULL); 
+    ompi_free_list_init (&(ptl->gm_recv_frags_free),
+			 sizeof (mca_ptl_gm_recv_frag_t),
+			 OBJ_CLASS (mca_ptl_gm_recv_frag_t),
+			 ptl->num_recv_tokens,ptl->num_recv_tokens, 1, NULL); 
 
-        /*allocate the elements */
-        free_rfragment = (mca_ptl_gm_recv_frag_t *)
-                    malloc(sizeof(mca_ptl_gm_recv_frag_t) * NUM_RECV_FRAGS);
+    /*allocate the elements */
+    free_rfragment = (mca_ptl_gm_recv_frag_t *)
+	malloc(sizeof(mca_ptl_gm_recv_frag_t) * ptl->num_recv_tokens);

+    /*allocate the registered memory */
+    gm_recv_reg_memory =
+	gm_dma_malloc (ptl->gm_port, (GM_RECV_BUF_SIZE * ptl->num_recv_tokens ) );

-        for (i = 0; i < NUM_RECV_FRAGS; i++) {
-            ompi_list_item_t *item;
-            item = (ompi_list_item_t *) free_rfragment;
-            OMPI_FREE_LIST_RETURN( free_rlist, item );
-            free_rfragment++;
-        }
+    if( NULL == gm_recv_reg_memory ) {
+	ompi_output( 0, "unable to allocate registered memory for receive\n" );
+	return OMPI_ERR_OUT_OF_RESOURCE;
+    }

-        /*allocate the registered memory */
-        gm_recv_reg_memory =
-            gm_dma_malloc (ptl->my_port, (GM_RECV_BUF_SIZE * ptl->num_recv_tokens ) );
+    for( i = 0; i < ptl->num_recv_tokens; i++ ) {
+	ompi_list_item_t *item = (ompi_list_item_t *) free_rfragment;
+	OMPI_FREE_LIST_RETURN( free_rlist, item );
+	free_rfragment++;

-        if( NULL == gm_recv_reg_memory ) 
-        {
-            ompi_output( 0, "unable to allocate registered memory for receive\n" );
-            return OMPI_ERR_OUT_OF_RESOURCE;
-        }
-
-        for (i = 0; i < ptl->num_recv_tokens ; i++)
-        {
-            gm_provide_receive_buffer( ptl->my_port, gm_recv_reg_memory,
-                                       GM_SIZE, GM_LOW_PRIORITY );
-            gm_recv_reg_memory = ((char *)gm_recv_reg_memory) + GM_RECV_BUF_SIZE;
-        }
+	gm_provide_receive_buffer( ptl->gm_port, gm_recv_reg_memory,
+				   GM_SIZE, GM_LOW_PRIORITY );
+	gm_recv_reg_memory = ((char *)gm_recv_reg_memory) + GM_RECV_BUF_SIZE;
    }

    return OMPI_SUCCESS;
 }
 
+static int
+mca_ptl_gm_init( mca_ptl_gm_component_t * gm )
+{
+    uint32_t index;
+    mca_ptl_gm_module_t* ptl;

+    /* let's try to find if GM is available */
+    if( GM_SUCCESS != gm_init() )
+	return OMPI_ERR_OUT_OF_RESOURCE;
+
+    /* First discover all available boards. For each board we will create a unique PTL */
+    mca_ptl_gm_component.gm_ptl_modules = calloc( mca_ptl_gm_component.gm_max_ptl_modules,
+						  sizeof (mca_ptl_gm_module_t *));
+    if (NULL == mca_ptl_gm_component.gm_ptl_modules)
+        return OMPI_ERR_OUT_OF_RESOURCE;
+
+    mca_ptl_gm_component.gm_num_ptl_modules = 
+	mca_ptl_gm_discover_boards( mca_ptl_gm_component.gm_ptl_modules,
+				    mca_ptl_gm_component.gm_max_ptl_modules,
+				    mca_ptl_gm_component.gm_max_boards_number,
+				    mca_ptl_gm_component.gm_max_port_number );
+
+    /* In the case when we are in a multi-threaded environment each PTL will have it's
+     * own thread. At this point all structures are correctly initialized, each thread
+     * will grab one and use it.
+     */
+    if( ompi_using_threads() ) {
+	uint32_t save_counter;
+
+	for( index = 0; index < mca_ptl_gm_component.gm_num_ptl_modules; index++ ) {
+	    ptl = mca_ptl_gm_component.gm_ptl_modules[index];
+	    /* Now prepost some received and allocate some sends. After this step the PTL
+	     * is fully initialized.
+	     */
+	    if( OMPI_SUCCESS != mca_ptl_gm_init_sendrecv( ptl ) )
+		break;
+
+	    ptl->thread.t_run = (ompi_thread_fn_t)mca_ptl_gm_thread_progress;
+	    ptl->thread.t_arg = (void*)ptl;
+	    if( OMPI_SUCCESS != ompi_thread_start( &(ptl->thread) ) )
+		break;
+	}
+	save_counter = index;
+	/* If we are unable to start all the required threads we update the total
+	 * number of threads and call finalize for the others PTLs.
+	 */
+	for( ; index < mca_ptl_gm_component.gm_num_ptl_modules; index++ ) {
+	    mca_ptl_base_module_t* ptl = (mca_ptl_base_module_t*)mca_ptl_gm_component.gm_ptl_modules[index];
+	    ptl->ptl_finalize( ptl );
+	}
+	mca_ptl_gm_component.gm_num_ptl_modules = save_counter;
+    }
+
+    return (mca_ptl_gm_component.gm_num_ptl_modules > 0 ? OMPI_SUCCESS : OMPI_ERR_OUT_OF_RESOURCE);
+}

 /*
 * Initialize the GM component, 
@ -405,20 +429,12 @@ mca_ptl_gm_component_init (int *num_ptl_modules,
    mca_ptl_base_module_t **ptls;

    *num_ptl_modules = 0;
-    *allow_multi_user_threads = false;
-    *have_hidden_threads = false;
+    *allow_multi_user_threads = true;
+    *have_hidden_threads = true;

-    if (OMPI_SUCCESS != ompi_mca_ptl_gm_init (&mca_ptl_gm_component)) {
-        ompi_output (0,
-                     "[%s:%d] error in initializing gm state and PTL's.\n",
-                     __FILE__, __LINE__);
-        return NULL;
-    }
-
-    if (OMPI_SUCCESS != ompi_mca_ptl_gm_init_sendrecv (&mca_ptl_gm_component)) {
-        ompi_output (0,
-                     "[%s:%d] error in initializing buffer resources .\n",
-                     __FILE__, __LINE__);
+    if (OMPI_SUCCESS != mca_ptl_gm_init (&mca_ptl_gm_component)) {
+        ompi_output( 0, "[%s:%d] error in initializing gm state and PTL's.\n",
+                     __FILE__, __LINE__ );
        return NULL;
    }

@ -426,7 +442,6 @@ mca_ptl_gm_component_init (int *num_ptl_modules,
    if (OMPI_SUCCESS != mca_ptl_gm_module_store_data_toexchange ())
        return 0;

-
    /* return array of PTLs */
    ptls = (mca_ptl_base_module_t**) malloc (
                mca_ptl_gm_component.gm_num_ptl_modules * sizeof(mca_ptl_base_module_t *));
--- a/src/mca/ptl/gm/src/ptl_gm_priv.c
+++ b/src/mca/ptl/gm/src/ptl_gm_priv.c
@ -34,16 +34,15 @@ int mca_ptl_gm_peer_put(mca_ptl_gm_peer_t *ptl_peer,
                        void * target_buffer,
                        int bytes) 
 {
-
-   gm_put( ptl_peer->peer_ptl->my_port, fragment->registered_buf,
-	   (gm_remote_ptr_t)target_buffer, bytes, GM_LOW_PRIORITY,
-	   ptl_peer->local_id, ptl_peer->port_number, 
-	   put_callback, (void *)fragment );
+    gm_put( ptl_peer->peer_ptl->gm_port, fragment->registered_buf,
+	    (gm_remote_ptr_t)target_buffer, bytes, GM_LOW_PRIORITY,
+	    ptl_peer->local_id, ptl_peer->port_number, 
+	    put_callback, (void *)fragment );


    fragment->send_frag.frag_base.frag_owner = &ptl_peer->peer_ptl->super;
    fragment->send_frag.frag_base.frag_peer =
-                             (struct mca_ptl_base_peer_t*)ptl_peer;
+	(struct mca_ptl_base_peer_t*)ptl_peer;
    fragment->send_frag.frag_base.frag_addr =(void *)target_buffer;
    fragment->send_frag.frag_base.frag_size = bytes; 
    return OMPI_SUCCESS;
@ -125,7 +124,7 @@ int mca_ptl_gm_peer_send(mca_ptl_gm_peer_t *ptl_peer,
 	     ((mca_ptl_base_header_t *)header)->hdr_ack.hdr_src_ptr);
 
    /* initiate the gm send */
-    gm_send_with_callback( ptl_peer->peer_ptl->my_port, fragment->send_buf, 
+    gm_send_with_callback( ptl_peer->peer_ptl->gm_port, fragment->send_buf, 
                           GM_SIZE, size_out, GM_LOW_PRIORITY, ptl_peer->local_id,
                           ptl_peer->port_number, send_callback, (void *)fragment );

@ -181,7 +180,7 @@ void put_callback(struct gm_port *port,void * context, gm_status_t status)
 #endif

        /* deregister the user memory */
-       status = gm_deregister_memory(ptl->my_port, (char *)(putfrag->registered_buf), bytes2);
+       status = gm_deregister_memory(ptl->gm_port, (char *)(putfrag->registered_buf), bytes2);

       if(GM_SUCCESS != status) {
    	   ompi_output(0," unpinning memory failed\n");
@ -446,7 +445,7 @@ void ptl_gm_ctrl_frag(struct mca_ptl_gm_module_t *ptl,
      /* deregister the memory */
      bytes = header->hdr_ack.hdr_dst_size;
      reg_buf =(char *) header->hdr_ack.hdr_dst_addr.pval; 
-      status = gm_deregister_memory(ptl->my_port, reg_buf, bytes);
+      status = gm_deregister_memory(ptl->gm_port, reg_buf, bytes);

      if(GM_SUCCESS != status) 
 	{
@ -555,7 +554,6 @@ void mca_ptl_gm_outstanding_recv(mca_ptl_gm_module_t *ptl)
 
    size = ompi_list_get_size (&ptl->gm_recv_outstanding_queue);

-
    if (size > 0)
    {  
        frag = (mca_ptl_gm_recv_frag_t *)
@ -581,55 +579,59 @@ void mca_ptl_gm_outstanding_recv(mca_ptl_gm_module_t *ptl)

 } 

+int mca_ptl_gm_analyze_recv_event(mca_ptl_gm_module_t* ptl, gm_recv_event_t* event )
+{
+    void * mesg;
+    mca_ptl_gm_recv_frag_t * frag;
+
+    switch (gm_ntohc(event->recv.type)) {
+    case GM_RECV_EVENT:
+    case GM_HIGH_RECV_EVENT:
+    case GM_PEER_RECV_EVENT:
+    case GM_HIGH_PEER_RECV_EVENT:
+	mesg = gm_ntohp(event->recv.buffer);
+	frag = ptl_gm_handle_recv( ptl, event );
+	GM_DBG(PTL_GM_DBG_COMM,"FINISHED HANDLING INCOMING EVENT\n");
+	
+	if( (frag != NULL) && !(frag->matched) ) {
+	    /* allocate temporary buffer: temporary until the fragment will be finally matched */
+	    char* buffer = malloc( GM_SEND_BUF_SIZE );
+	    if (NULL == buffer) {
+		ompi_output(0, "[%s:%d] error in allocating memory \n", __FILE__, __LINE__);
+	    }
+	    /* copy the data from the registered buffer to the newly allocated one */
+	    memcpy( buffer, mesg, gm_ntohl(event->recv.length) );
+	    /* associate the buffer with the unexpected fragment */
+	    frag->frag_recv.frag_base.frag_addr = (void *)buffer;
+	    /* mark the fragment as having pending buffers */
+	    frag->have_allocated_buffer = true;
+	    
+	}
+	gm_provide_receive_buffer( ptl->gm_port, gm_ntohp(event->recv.buffer),
+				   GM_SIZE, GM_LOW_PRIORITY );
+	break;
+    case GM_NO_RECV_EVENT:
+	break;
+	
+    default:
+	gm_unknown(ptl->gm_port, event);
+	
+    }
+
+    return 0;
+}
+
 int mca_ptl_gm_incoming_recv (mca_ptl_gm_component_t * gm_comp)
 {
-    int i;
+    uint32_t i;
    gm_recv_event_t *event;
-    void * mesg;
    mca_ptl_gm_module_t *ptl;
-    mca_ptl_gm_recv_frag_t * frag;

    for( i = 0; i< gm_comp->gm_num_ptl_modules; i++) {
        ptl = gm_comp->gm_ptl_modules[i];
-        event = gm_receive(ptl->my_port);
-
-        switch (gm_ntohc(event->recv.type)) {
-        case GM_RECV_EVENT:
-        case GM_HIGH_RECV_EVENT:
-        case GM_PEER_RECV_EVENT:
-        case GM_HIGH_PEER_RECV_EVENT:
-            mesg = gm_ntohp(event->recv.buffer);
-            frag = ptl_gm_handle_recv( ptl, event );
-            GM_DBG(PTL_GM_DBG_COMM,"FINISHED HANDLING INCOMING EVENT\n");
-
-            if( (frag != NULL) && !(frag->matched) ) {
-                /* allocate temporary buffer: temporary until the fragment will be finally matched */
-                char* buffer = malloc( GM_SEND_BUF_SIZE );
-                if (NULL == buffer)
-                {
-                  ompi_output(0, "[%s:%d] error in allocating memory \n",
-                                                        __FILE__, __LINE__);
-
-                }
-                /* copy the data from the registered buffer to the newly allocated one */
-                memcpy( buffer, mesg, gm_ntohl(event->recv.length) );
-                /* associate the buffer with the unexpected fragment */
-                frag->frag_recv.frag_base.frag_addr = (void *)buffer;
-                /* mark the fragment as having pending buffers */
-                frag->have_allocated_buffer = true;
-
-            }
-            gm_provide_receive_buffer( ptl->my_port, gm_ntohp(event->recv.buffer),
-                                       GM_SIZE, GM_LOW_PRIORITY );
-            break;
-        case GM_NO_RECV_EVENT:
-            break;
-
-        default:
-            gm_unknown(ptl->my_port, event);
-
-        }
+        event = gm_receive(ptl->gm_port);

+	mca_ptl_gm_analyze_recv_event( ptl, event );
    }  
    return 0;
 }
--- a/src/mca/ptl/gm/src/ptl_gm_priv.h
+++ b/src/mca/ptl/gm/src/ptl_gm_priv.h
@ -49,6 +49,8 @@ mca_ptl_gm_recv_frag_t* ptl_gm_data_frag( struct mca_ptl_gm_module_t *ptl,
 mca_ptl_gm_recv_frag_t* ptl_gm_handle_recv( mca_ptl_gm_module_t *ptl,
                                            gm_recv_event_t* event );

+int mca_ptl_gm_analyze_recv_event(mca_ptl_gm_module_t* ptl, gm_recv_event_t* event );
+
 int mca_ptl_gm_incoming_recv (mca_ptl_gm_component_t * gm_comp);