1
1
Josh Hursey e10f476c87 Bring over the jjh-filem branch which contains a non-blocking FileM interface
and implementation. This has shown drastic performance benefit when
transferring Many files at roughly the same time.

I tested this for many different filem operations and everything was working
fine. Let me know if you have any problems with this functionality.

Some Notes:
 - opal-checkpoint now has a 'quiet' flag to keep it from being too verbose.

 - FileM RSH component is fully non-blocking.

 - FileM RSH component has incomming connection throttling since by default
   ssh only allows 10 concurrent scp connections to any single host. This
   default can be adjusted via an MCA parameter.
    {{{-mca filem_rsh_max_incomming 10}}}

 - There is an MCA parameter for max outgoing connections, but it is currently
   not implemented. If someone needs it then it should not be hard to implement.
    {{{-mca filem_rsh_max_outgoing 10}}}

 - Changed the FileM request structure so that it is a bit more explicit and
   flexible.

 - Moved the 'preload-binary' and 'preload-files' functionality into odls/base
   allowing for code reuse in the 'process' and 'default' ODLS components.

 - Fixed a bug in the process name resolution which broke the 'preload-*'
   functionality due to GPR table structure changes.

 - The FileM RSH component might be able to see even more speedup from using a
   thread pool to operate on the work_pool structures, but that is for future
   work.

 - Added a 'opal-show-help' file to ODLS Base

This commit was SVN r16252.
2007-09-27 13:13:29 +00:00

133 строки
4.6 KiB
C

/*
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#ifndef ORTE_FILEM_BASE_H
#define ORTE_FILEM_BASE_H
#include "orte_config.h"
#include "orte/mca/rml/rml.h"
#include "orte/dss/dss.h"
#include "orte/mca/filem/filem.h"
/*
* Global functions for MCA overall FILEM
*/
#if defined(c_plusplus) || defined(__cplusplus)
extern "C" {
#endif
/**
* FileM request object maintenance functions
*/
ORTE_DECLSPEC void orte_filem_base_process_set_construct(orte_filem_base_process_set_t *obj);
ORTE_DECLSPEC void orte_filem_base_process_set_destruct( orte_filem_base_process_set_t *obj);
ORTE_DECLSPEC void orte_filem_base_file_set_construct(orte_filem_base_file_set_t *obj);
ORTE_DECLSPEC void orte_filem_base_file_set_destruct( orte_filem_base_file_set_t *obj);
ORTE_DECLSPEC void orte_filem_base_construct(orte_filem_base_request_t *obj);
ORTE_DECLSPEC void orte_filem_base_destruct( orte_filem_base_request_t *obj);
/**
* Initialize the FILEM MCA framework
*
* @retval ORTE_SUCCESS Upon success
* @retval ORTE_ERROR Upon failures
*
* This function is invoked during orte_init();
*/
ORTE_DECLSPEC int orte_filem_base_open(void);
/**
* Select an available component.
*
* @retval ORTE_SUCCESS Upon Success
* @retval ORTE_NOT_FOUND If no component can be selected
* @retval ORTE_ERROR Upon other failure
*
*/
ORTE_DECLSPEC int orte_filem_base_select(void);
/**
* Finalize the FILEM MCA framework
*
* @retval ORTE_SUCCESS Upon success
* @retval ORTE_ERROR Upon failures
*
* This function is invoked during orte_finalize();
*/
ORTE_DECLSPEC int orte_filem_base_close(void);
/**
* Globals
*/
ORTE_DECLSPEC extern int orte_filem_base_output;
ORTE_DECLSPEC extern opal_list_t orte_filem_base_components_available;
ORTE_DECLSPEC extern orte_filem_base_component_t orte_filem_base_selected_component;
ORTE_DECLSPEC extern orte_filem_base_module_t orte_filem;
/**
* 'None' component functions
* These are to be used when no component is selected.
* They just return success, and empty strings as necessary.
*/
int orte_filem_base_none_open(void);
int orte_filem_base_none_close(void);
int orte_filem_base_module_init(void);
int orte_filem_base_module_finalize(void);
int orte_filem_base_none_put(orte_filem_base_request_t *request);
int orte_filem_base_none_put_nb(orte_filem_base_request_t *request);
int orte_filem_base_none_get(orte_filem_base_request_t *request);
int orte_filem_base_none_get_nb(orte_filem_base_request_t *request);
int orte_filem_base_none_rm( orte_filem_base_request_t *request);
int orte_filem_base_none_rm_nb( orte_filem_base_request_t *request);
int orte_filem_base_none_wait( orte_filem_base_request_t *request);
int orte_filem_base_none_wait_all( opal_list_t *request_list);
/**
* Some utility functions
*/
ORTE_DECLSPEC int orte_filem_base_listener_init(orte_rml_buffer_callback_fn_t rml_cbfunc);
ORTE_DECLSPEC int orte_filem_base_listener_cancel(void);
/**
* Get Node Name for an ORTE process
*/
ORTE_DECLSPEC int orte_filem_base_get_proc_node_name(orte_process_name_t *proc, char **machine_name);
ORTE_DECLSPEC int orte_filem_base_query_remote_path(char **remote_ref, orte_process_name_t *peer, int *flag);
ORTE_DECLSPEC void orte_filem_base_query_callback(int status,
orte_process_name_t* peer,
orte_buffer_t *buffer,
orte_rml_tag_t tag,
void* cbdata);
/**
* Setup request structure
*/
ORTE_DECLSPEC int orte_filem_base_prepare_request(orte_filem_base_request_t *request, int move_type);
#if defined(c_plusplus) || defined(__cplusplus)
}
#endif
#endif /* ORTE_FILEM_BASE_H */