1
1
openmpi/orte/mca/filem/filem.h
Josh Hursey e10f476c87 Bring over the jjh-filem branch which contains a non-blocking FileM interface
and implementation. This has shown drastic performance benefit when
transferring Many files at roughly the same time.

I tested this for many different filem operations and everything was working
fine. Let me know if you have any problems with this functionality.

Some Notes:
 - opal-checkpoint now has a 'quiet' flag to keep it from being too verbose.

 - FileM RSH component is fully non-blocking.

 - FileM RSH component has incomming connection throttling since by default
   ssh only allows 10 concurrent scp connections to any single host. This
   default can be adjusted via an MCA parameter.
    {{{-mca filem_rsh_max_incomming 10}}}

 - There is an MCA parameter for max outgoing connections, but it is currently
   not implemented. If someone needs it then it should not be hard to implement.
    {{{-mca filem_rsh_max_outgoing 10}}}

 - Changed the FileM request structure so that it is a bit more explicit and
   flexible.

 - Moved the 'preload-binary' and 'preload-files' functionality into odls/base
   allowing for code reuse in the 'process' and 'default' ODLS components.

 - Fixed a bug in the process name resolution which broke the 'preload-*'
   functionality due to GPR table structure changes.

 - The FileM RSH component might be able to see even more speedup from using a
   thread pool to operate on the work_pool structures, but that is for future
   work.

 - Added a 'opal-show-help' file to ODLS Base

This commit was SVN r16252.
2007-09-27 13:13:29 +00:00

379 строки
11 KiB
C

/*
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/**
* @file
*
* Remote File Management (FileM) Interface
*
*/
#ifndef MCA_FILEM_H
#define MCA_FILEM_H
#include "orte_config.h"
#include "orte/orte_constants.h"
#include "orte/orte_types.h"
#include "opal/mca/mca.h"
#include "opal/mca/base/base.h"
#include "orte/mca/ns/ns.h"
#include "opal/class/opal_object.h"
#if defined(c_plusplus) || defined(__cplusplus)
extern "C" {
#endif
/**
* A set of flags that determine the type of the file
* in question
*/
#define ORTE_FILEM_TYPE_FILE 0
#define ORTE_FILEM_TYPE_DIR 1
#define ORTE_FILEM_TYPE_UNKNOWN 2
/**
* Type of moment
*/
#define ORTE_FILEM_MOVE_TYPE_PUT 0
#define ORTE_FILEM_MOVE_TYPE_GET 1
#define ORTE_FILEM_MOVE_TYPE_RM 2
#define ORTE_FILEM_MOVE_TYPE_UNKNOWN 3
/**
* Define a Process Set
*
* Source: A single source of the operation.
* Sink: Desitination of the operation.
*/
struct orte_filem_base_process_set_1_0_0_t {
/** This is an object, so must have a super */
opal_list_item_t super;
/** Source Process */
orte_process_name_t source;
/** Sink Process */
orte_process_name_t sink;
};
typedef struct orte_filem_base_process_set_1_0_0_t orte_filem_base_process_set_1_0_0_t;
typedef struct orte_filem_base_process_set_1_0_0_t orte_filem_base_process_set_t;
ORTE_DECLSPEC OBJ_CLASS_DECLARATION(orte_filem_base_process_set_t);
/**
* Define a File Pair
*
* Local: Local file reference
* Remove: Remote file reference
*
* Note: If multiple process sinks are used it is assumed that the
* file reference is the same for each of the sinks. If this is not
* true then more than one filem request needs to be created.
*/
struct orte_filem_base_file_set_1_0_0_t {
/** This is an object, so must have a super */
opal_list_item_t super;
/* Local file reference */
char * local_target;
/* Remove file reference */
char * remote_target;
/* Type of file to move */
int target_flag;
};
typedef struct orte_filem_base_file_set_1_0_0_t orte_filem_base_file_set_1_0_0_t;
typedef struct orte_filem_base_file_set_1_0_0_t orte_filem_base_file_set_t;
ORTE_DECLSPEC OBJ_CLASS_DECLARATION(orte_filem_base_file_set_t);
/**
* Definition of a file movement request
* This will allow:
* - The movement of one or more files
* - to/from one or more processes
* in a single call of the API function. Allowing the implementation
* to optimize the sending/receiving of data.
* Used for the following:
*
*/
struct orte_filem_base_request_1_0_0_t {
/** This is an object, so must have a super */
opal_list_item_t super;
/*
* A list of process sets
*/
opal_list_t process_sets;
/*
* A list of file pairings
*/
opal_list_t file_sets;
/*
* Internal use:
* Number of movements
*/
int num_mv;
/*
* Internal use:
* Boolean to indianate if transfer is complete
*/
bool *is_done;
/*
* Internal use:
* Boolean to indianate if transfer is active
*/
bool *is_active;
/*
* Internal use:
* Exit status of the copy command
*/
int32_t *exit_status;
/*
* Internal use:
* Movement type
*/
int movement_type;
};
typedef struct orte_filem_base_request_1_0_0_t orte_filem_base_request_1_0_0_t;
typedef struct orte_filem_base_request_1_0_0_t orte_filem_base_request_t;
ORTE_DECLSPEC OBJ_CLASS_DECLARATION(orte_filem_base_request_t);
/**
* Query function for FILEM components.
* Returns a priority to rank it agaianst other available FILEM components.
*/
typedef struct orte_filem_base_module_1_0_0_t *
(*orte_filem_base_component_query_1_0_0_fn_t)
(int *priority);
/**
* Module initialization function.
* Returns ORTE_SUCCESS
*/
typedef int (*orte_filem_base_module_init_fn_t)
(void);
/**
* Module finalization function.
* Returns ORTE_SUCCESS
*/
typedef int (*orte_filem_base_module_finalize_fn_t)
(void);
/**
* Put a file or directory on the remote machine
*
* Note: By using a relative path for the remote file/directory, the filem
* component will negotiate the correct absolute path for that file/directory
* for the remote machine.
*
* @param request FileM request describing the files/directories to send,
* the remote files/directories to use, and the processes to see the change.
*
* @return ORTE_SUCCESS on successful file transer
* @return ORTE_ERROR on failed file transfer
*/
typedef int (*orte_filem_base_put_fn_t)
(orte_filem_base_request_t *request);
/**
* Put a file or directory on the remote machine (Async)
*
* Note: By using a relative path for the remote file/directory, the filem
* component will negotiate the correct absolute path for that file/directory
* for the remote machine.
*
* @param request FileM request describing the files/directories to send,
* the remote files/directories to use, and the processes to see the change.
*
* @return ORTE_SUCCESS on successful file transer
* @return ORTE_ERROR on failed file transfer
*/
typedef int (*orte_filem_base_put_nb_fn_t)
(orte_filem_base_request_t *request);
/**
* Get a file from the remote machine
*
* Note: By using a relative path for the remote file/directory, the filem
* component will negotiate the correct absolute path for that file/directory
* for the remote machine.
*
* @param request FileM request describing the files/directories to receive,
* the remote files/directories to use, and the processes to see the change.
*
* @return ORTE_SUCCESS on successful file transer
* @return ORTE_ERROR on failed file transfer
*/
typedef int (*orte_filem_base_get_fn_t)
(orte_filem_base_request_t *request);
/**
* Get a file from the remote machine (Async)
*
* Note: By using a relative path for the remote file/directory, the filem
* component will negotiate the correct absolute path for that file/directory
* for the remote machine.
*
* @param request FileM request describing the files/directories to receive,
* the remote files/directories to use, and the processes to see the change.
*
* @return ORTE_SUCCESS on successful file transer
* @return ORTE_ERROR on failed file transfer
*/
typedef int (*orte_filem_base_get_nb_fn_t)
(orte_filem_base_request_t *request);
/**
* Remove a file from the remote machine
*
* Note: By using a relative path for the remote file/directory, the filem
* component will negotiate the correct absolute path for that file/directory
* for the remote machine.
*
* @param request FileM request describing the remote files/directories to remove,
* the processes to see the change.
*
* @return ORTE_SUCCESS on success
* @return ORTE_ERROR on fail
*/
typedef int (*orte_filem_base_rm_fn_t)
(orte_filem_base_request_t *request);
/**
* Remove a file from the remote machine (Async)
*
* Note: By using a relative path for the remote file/directory, the filem
* component will negotiate the correct absolute path for that file/directory
* for the remote machine.
*
* @param request FileM request describing the remote files/directories to remove,
* the processes to see the change.
*
* @return ORTE_SUCCESS on success
* @return ORTE_ERROR on fail
*/
typedef int (*orte_filem_base_rm_nb_fn_t)
(orte_filem_base_request_t *request);
/**
* Wait for a single file movement request to finish
*
* @param request FileM request describing the remote files/directories.
*
* The request must have been passed through one of the non-blocking functions
* before calling wait or wait_all otherwise ORTE_ERROR will be returned.
*
* @return ORTE_SUCCESS on success
* @return ORTE_ERROR on fail
*/
typedef int (*orte_filem_base_wait_fn_t)
(orte_filem_base_request_t *request);
/**
* Wait for a multiple file movement requests to finish
*
* @param request_list opal_list_t of FileM requests describing the remote files/directories.
*
* The request must have been passed through one of the non-blocking functions
* before calling wait or wait_all otherwise ORTE_ERROR will be returned.
*
* @return ORTE_SUCCESS on success
* @return ORTE_ERROR on fail
*/
typedef int (*orte_filem_base_wait_all_fn_t)
(opal_list_t *request_list);
/**
* Structure for FILEM v1.0.0 components.
*/
struct orte_filem_base_component_1_0_0_t {
/** MCA base component */
mca_base_component_t filem_version;
/** MCA base data */
mca_base_component_data_1_0_0_t filem_data;
/** Component Query for Selection Function */
orte_filem_base_component_query_1_0_0_fn_t filem_query;
/** Verbosity Level */
int verbose;
/** Output Handle for opal_output */
int output_handle;
/** Default Priority */
int priority;
};
typedef struct orte_filem_base_component_1_0_0_t orte_filem_base_component_1_0_0_t;
typedef struct orte_filem_base_component_1_0_0_t orte_filem_base_component_t;
/**
* Structure for FILEM v1.0.0 modules
*/
struct orte_filem_base_module_1_0_0_t {
/** Initialization Function */
orte_filem_base_module_init_fn_t filem_init;
/** Finalization Function */
orte_filem_base_module_finalize_fn_t filem_finalize;
/** Put a file on the remote machine */
orte_filem_base_put_fn_t put;
orte_filem_base_put_nb_fn_t put_nb;
/** Get a file from the remote machine */
orte_filem_base_get_fn_t get;
orte_filem_base_get_nb_fn_t get_nb;
/** Remove a file on the remote machine */
orte_filem_base_rm_fn_t rm;
orte_filem_base_rm_nb_fn_t rm_nb;
/** Test functions for the non-blocking versions */
orte_filem_base_wait_fn_t wait;
orte_filem_base_wait_all_fn_t wait_all;
};
typedef struct orte_filem_base_module_1_0_0_t orte_filem_base_module_1_0_0_t;
typedef struct orte_filem_base_module_1_0_0_t orte_filem_base_module_t;
ORTE_DECLSPEC extern orte_filem_base_module_t orte_filem;
/**
* Macro for use in components that are of type FILEM v1.0.0
*/
#define ORTE_FILEM_BASE_VERSION_1_0_0 \
/* FILEM v1.0 is chained to MCA v1.0 */ \
MCA_BASE_VERSION_1_0_0, \
/* FILEM v1.0 */ \
"filem", 1, 0, 0
#if defined(c_plusplus) || defined(__cplusplus)
}
#endif
#endif /* ORTE_FILEM_H */