664 строки
14 KiB
C
664 строки
14 KiB
C
#include <stdio.h>
|
|
#include <string.h>
|
|
#include <slang.h>
|
|
|
|
SLANG_MODULE(csv);
|
|
|
|
static int CSV_Type_Id = 0;
|
|
|
|
typedef struct _CSV_Type CSV_Type;
|
|
struct _CSV_Type
|
|
{
|
|
char delimchar;
|
|
char quotechar;
|
|
SLang_Name_Type *read_callback;
|
|
SLang_Any_Type *callback_data;
|
|
#define CSV_SKIP_BLANK_ROWS 0x01
|
|
#define CSV_STOP_ON_BLANK_ROWS 0x02
|
|
#define BLANK_ROW_BEHAVIOR (CSV_SKIP_BLANK_ROWS|CSV_STOP_ON_BLANK_ROWS)
|
|
#define CSV_QUOTE_SOME 0x04
|
|
#define CSV_QUOTE_ALL 0x08
|
|
int flags;
|
|
};
|
|
|
|
static int check_special_chars (CSV_Type *csv)
|
|
{
|
|
if (csv->delimchar == 0) csv->delimchar = ',';
|
|
if (csv->quotechar == 0) csv->quotechar = '"';
|
|
return 0;
|
|
}
|
|
|
|
static int execute_read_callback (CSV_Type *csv, char **sptr)
|
|
{
|
|
char *s;
|
|
|
|
*sptr = NULL;
|
|
|
|
if ((-1 == SLang_start_arg_list ())
|
|
|| (-1 == SLang_push_anytype (csv->callback_data))
|
|
|| (-1 == SLang_end_arg_list ())
|
|
|| (-1 == SLexecute_function (csv->read_callback)))
|
|
return -1;
|
|
|
|
if (SLang_peek_at_stack () == SLANG_NULL_TYPE)
|
|
{
|
|
(void) SLang_pop_null ();
|
|
return 0;
|
|
}
|
|
|
|
if (-1 == SLang_pop_slstring (&s))
|
|
return -1;
|
|
|
|
*sptr = s;
|
|
return 1;
|
|
}
|
|
|
|
typedef struct
|
|
{
|
|
char **values;
|
|
SLindex_Type num_allocated;
|
|
SLindex_Type num;
|
|
}
|
|
Values_Array_Type;
|
|
|
|
static int push_values_array (Values_Array_Type *av, int allow_empty_array)
|
|
{
|
|
SLang_Array_Type *at;
|
|
char **new_values;
|
|
|
|
if (av->num == 0)
|
|
{
|
|
if (allow_empty_array == 0)
|
|
return SLang_push_null ();
|
|
SLfree ((char *) av->values);
|
|
av->values = NULL;
|
|
}
|
|
else
|
|
{
|
|
if (NULL == (new_values = (char **)SLrealloc ((char *)av->values, av->num*sizeof(char *))))
|
|
return -1;
|
|
av->values = new_values;
|
|
}
|
|
|
|
av->num_allocated = av->num;
|
|
at = SLang_create_array (SLANG_STRING_TYPE, 0, av->values, &av->num, 1);
|
|
|
|
if (at == NULL)
|
|
return -1;
|
|
|
|
av->num_allocated = 0;
|
|
av->num = 0;
|
|
av->values = NULL;
|
|
|
|
return SLang_push_array (at, 1);
|
|
}
|
|
|
|
static int init_values_array_type (Values_Array_Type *av)
|
|
{
|
|
memset ((char *)av, 0, sizeof(Values_Array_Type));
|
|
return 0;
|
|
}
|
|
|
|
static void free_values_array (Values_Array_Type *av)
|
|
{
|
|
SLindex_Type i, num;
|
|
char **values;
|
|
|
|
if (NULL == (values = av->values))
|
|
return;
|
|
num = av->num;
|
|
for (i = 0; i < num; i++)
|
|
SLang_free_slstring (values[i]);
|
|
SLfree ((char *)values);
|
|
}
|
|
|
|
static int store_value (Values_Array_Type *va, char *value)
|
|
{
|
|
SLindex_Type num_allocated;
|
|
|
|
num_allocated = va->num_allocated;
|
|
if (num_allocated == va->num)
|
|
{
|
|
char **values;
|
|
num_allocated += 256;
|
|
values = (char **)SLrealloc ((char *)va->values, num_allocated*sizeof(char *));
|
|
if (values == NULL)
|
|
return -1;
|
|
va->values = values;
|
|
va->num_allocated = num_allocated;
|
|
}
|
|
if (NULL == (va->values[va->num] = SLang_create_slstring (value)))
|
|
return -1;
|
|
|
|
va->num++;
|
|
return 0;
|
|
}
|
|
|
|
#define NEXT_CHAR(ch) \
|
|
while (do_read \
|
|
|| (0 == (ch = line[line_ofs++])) \
|
|
|| (ch == '\r')) \
|
|
{ \
|
|
if ((do_read == 0) && (ch == '\r') && (line[line_ofs] == '\n')) \
|
|
{ \
|
|
line_ofs++; \
|
|
ch = '\n'; \
|
|
break; \
|
|
} \
|
|
SLang_free_slstring (line); \
|
|
line = NULL; \
|
|
status = execute_read_callback (csv, &line); \
|
|
do_read = 0; \
|
|
if (status == -1) \
|
|
goto return_error; \
|
|
line_ofs = 0; \
|
|
if (status == 0) \
|
|
{ \
|
|
ch = 0; \
|
|
break; \
|
|
} \
|
|
}
|
|
|
|
static int decode_csv_row (CSV_Type *csv, int flags)
|
|
{
|
|
char *line;
|
|
size_t line_ofs;
|
|
char *value;
|
|
size_t value_size, value_ofs;
|
|
char delimchar, quotechar;
|
|
int return_status;
|
|
Values_Array_Type av;
|
|
int do_read, in_quote;
|
|
int blank_line_seen;
|
|
int is_quoted;
|
|
|
|
if (NULL == csv->read_callback)
|
|
{
|
|
SLang_verror (SL_InvalidParm_Error, "CSV decoder object has no read callback function");
|
|
return -1;
|
|
}
|
|
|
|
if (-1 == init_values_array_type (&av))
|
|
return -1;
|
|
|
|
delimchar = csv->delimchar;
|
|
quotechar = csv->quotechar;
|
|
value_ofs = line_ofs = 0;
|
|
value_size = 0;
|
|
value = NULL;
|
|
line = NULL;
|
|
do_read = 1;
|
|
|
|
in_quote = 0;
|
|
return_status = -1;
|
|
blank_line_seen = 0;
|
|
is_quoted = 0;
|
|
while (1)
|
|
{
|
|
int status;
|
|
char ch;
|
|
|
|
if (value_ofs == value_size)
|
|
{
|
|
char *new_value;
|
|
|
|
if (value_size < 64)
|
|
value_size += 32;
|
|
else if (value_size < 8192)
|
|
value_size *= 2;
|
|
else value_size += 8192;
|
|
|
|
new_value = (char *)SLrealloc (value, value_size);
|
|
if (new_value == NULL)
|
|
goto return_error;
|
|
value = new_value;
|
|
}
|
|
|
|
NEXT_CHAR(ch)
|
|
|
|
if ((ch == quotechar) && quotechar)
|
|
{
|
|
if (in_quote)
|
|
{
|
|
NEXT_CHAR(ch)
|
|
if (ch == quotechar)
|
|
{
|
|
value[value_ofs++] = ch;
|
|
continue;
|
|
}
|
|
|
|
if ((ch != delimchar) && (ch != 0) && (ch != '\n'))
|
|
{
|
|
SLang_verror (SL_Data_Error, "Expecting a delimiter after an end-quote character in field #%ld",
|
|
(long)av.num+1);
|
|
goto return_error;
|
|
}
|
|
in_quote = 0;
|
|
/* drop */
|
|
}
|
|
else if (value_ofs != 0)
|
|
{
|
|
SLang_verror (SL_Data_Error, "Misplaced quote character inside csv field #%ld",
|
|
(long)av.num+1);
|
|
goto return_error;
|
|
}
|
|
else
|
|
{
|
|
in_quote = 1;
|
|
is_quoted = 1;
|
|
continue;
|
|
}
|
|
}
|
|
|
|
if (ch == delimchar)
|
|
{
|
|
if (in_quote)
|
|
{
|
|
value[value_ofs++] = ch;
|
|
continue;
|
|
}
|
|
value[value_ofs] = 0;
|
|
if (-1 == store_value (&av, value))
|
|
goto return_error;
|
|
value_ofs = 0;
|
|
continue;
|
|
}
|
|
if ((ch == 0) || (ch == '\n'))
|
|
{
|
|
if (in_quote)
|
|
{
|
|
if (ch == '\n')
|
|
{
|
|
value[value_ofs++] = ch;
|
|
do_read = 1;
|
|
continue;
|
|
}
|
|
SLang_verror (SL_Data_Error, "No closing quote seen parsing CSV data field #%ld",
|
|
(long)av.num+1);
|
|
goto return_error;
|
|
}
|
|
|
|
if ((ch == '\n') || (av.num != 0) || (value_ofs > 0))
|
|
{
|
|
if ((is_quoted == 0)
|
|
&& (ch == '\n') && (av.num == 0) && (value_ofs == 0))
|
|
{
|
|
/* blank line */
|
|
int blank_line_behavior = (flags & BLANK_ROW_BEHAVIOR);
|
|
if (blank_line_behavior == CSV_SKIP_BLANK_ROWS)
|
|
{
|
|
do_read = 1;
|
|
continue;
|
|
}
|
|
if (blank_line_behavior == CSV_STOP_ON_BLANK_ROWS)
|
|
{
|
|
blank_line_seen = 1;
|
|
break;
|
|
}
|
|
}
|
|
value[value_ofs] = 0;
|
|
if (-1 == store_value (&av, value))
|
|
goto return_error;
|
|
}
|
|
break; /* done */
|
|
}
|
|
|
|
value[value_ofs++] = ch;
|
|
}
|
|
|
|
/* Get here if at end of line or file */
|
|
return_status = push_values_array (&av, blank_line_seen);
|
|
/* drop */
|
|
|
|
return_error:
|
|
SLfree (value);
|
|
free_values_array(&av);
|
|
if (line != NULL)
|
|
SLang_free_slstring (line);
|
|
return return_status;
|
|
}
|
|
|
|
static void free_csv_type (CSV_Type *csv)
|
|
{
|
|
if (csv == NULL)
|
|
return;
|
|
if (csv->callback_data != NULL) SLang_free_anytype (csv->callback_data);
|
|
if (csv->read_callback != NULL) SLang_free_function (csv->read_callback);
|
|
SLfree ((char *)csv);
|
|
}
|
|
|
|
static CSV_Type *pop_csv_type (SLang_MMT_Type **mmtp)
|
|
{
|
|
SLang_MMT_Type *mmt;
|
|
|
|
if (NULL == (mmt = SLang_pop_mmt (CSV_Type_Id)))
|
|
{
|
|
*mmtp = NULL;
|
|
return NULL;
|
|
}
|
|
*mmtp = mmt;
|
|
return (CSV_Type *)SLang_object_from_mmt (mmt);
|
|
}
|
|
|
|
/* Usage: obj = cvs_decoder_new (&read_callback, callback_data, delim, quote, flags) */
|
|
static void new_csv_decoder_intrin (void)
|
|
{
|
|
CSV_Type *csv;
|
|
SLang_MMT_Type *mmt;
|
|
|
|
if (NULL == (csv = (CSV_Type *)SLmalloc(sizeof(CSV_Type))))
|
|
return;
|
|
memset ((char *)csv, 0, sizeof(CSV_Type));
|
|
|
|
if ((-1 == SLang_pop_int (&csv->flags))
|
|
||(-1 == SLang_pop_char (&csv->quotechar))
|
|
|| (-1 == SLang_pop_char (&csv->delimchar))
|
|
|| (-1 == check_special_chars (csv))
|
|
|| (-1 == SLang_pop_anytype (&csv->callback_data))
|
|
|| (NULL == (csv->read_callback = SLang_pop_function ()))
|
|
|| (NULL == (mmt = SLang_create_mmt (CSV_Type_Id, (VOID_STAR)csv))))
|
|
{
|
|
free_csv_type (csv);
|
|
return;
|
|
}
|
|
|
|
if (-1 == SLang_push_mmt (mmt))
|
|
SLang_free_mmt (mmt);
|
|
}
|
|
|
|
static void decode_csv_row_intrin (void)
|
|
{
|
|
CSV_Type *csv;
|
|
SLang_MMT_Type *mmt;
|
|
int flags = 0;
|
|
int has_flags = 0;
|
|
|
|
if (SLang_Num_Function_Args == 2)
|
|
{
|
|
if (-1 == SLang_pop_int (&flags))
|
|
return;
|
|
|
|
has_flags = 1;
|
|
}
|
|
if (NULL == (csv = pop_csv_type (&mmt)))
|
|
return;
|
|
|
|
if (has_flags == 0)
|
|
flags = csv->flags;
|
|
|
|
(void) decode_csv_row (csv, flags);
|
|
SLang_free_mmt (mmt);
|
|
}
|
|
|
|
/* returns a malloced string */
|
|
static char *csv_encode (CSV_Type *csv,
|
|
char **fields, SLuindex_Type nfields,
|
|
int flags)
|
|
{
|
|
char *encoded_str, *s;
|
|
size_t size;
|
|
SLuindex_Type i;
|
|
char delimchar, quotechar;
|
|
int quote_some, quote_all;
|
|
char *fieldflags;
|
|
|
|
delimchar = csv->delimchar;
|
|
quotechar = csv->quotechar;
|
|
quote_some = flags & (CSV_QUOTE_SOME|CSV_QUOTE_ALL);
|
|
quote_all = flags & CSV_QUOTE_ALL;
|
|
|
|
size = 0;
|
|
if (nfields > 1)
|
|
size += nfields-1; /* for delimiters */
|
|
size += 3; /* for CRLF\0 */
|
|
|
|
fieldflags = (char *)SLmalloc(nfields+1);
|
|
if (fieldflags == NULL)
|
|
return NULL;
|
|
|
|
for (i = 0; i < nfields; i++)
|
|
{
|
|
char ch, *f, *field = fields[i];
|
|
int needs_quote = 0;
|
|
|
|
fieldflags[i] = 0;
|
|
if ((field == NULL) || (*field == 0))
|
|
{
|
|
if (quote_some)
|
|
{
|
|
fieldflags[i] = 1;
|
|
size += 2;
|
|
}
|
|
continue;
|
|
}
|
|
f = field;
|
|
while ((ch = *f++) != 0)
|
|
{
|
|
size++;
|
|
if (ch == quotechar)
|
|
{
|
|
needs_quote=1;
|
|
size++;
|
|
continue;
|
|
}
|
|
if (ch == delimchar)
|
|
{
|
|
needs_quote = 1;
|
|
continue;
|
|
}
|
|
|
|
if ((unsigned char)ch > ' ')
|
|
continue;
|
|
|
|
if (ch == '\n')
|
|
{
|
|
#if 0
|
|
/* Do not insert a \r-- excel cannot deal with \r\n in a field.
|
|
* It requires \r\n only at the end of the CSV line
|
|
*/
|
|
size++; /* for \r */
|
|
#endif
|
|
needs_quote = 1;
|
|
continue;
|
|
}
|
|
if (quote_some)
|
|
needs_quote = 1;
|
|
}
|
|
|
|
if (needs_quote || quote_all)
|
|
{
|
|
fieldflags[i] = 1;
|
|
size += 2;
|
|
}
|
|
}
|
|
|
|
if (NULL == (encoded_str = (char *)SLmalloc (size)))
|
|
{
|
|
SLfree (fieldflags);
|
|
return NULL;
|
|
}
|
|
s = encoded_str;
|
|
|
|
i = 0;
|
|
while (i < nfields)
|
|
{
|
|
char ch, *f, *field;
|
|
int needs_quote;
|
|
|
|
needs_quote = fieldflags[i];
|
|
field = fields[i];
|
|
i++;
|
|
|
|
if ((i > 1) && (i <= nfields))
|
|
*s++ = delimchar;
|
|
|
|
if (needs_quote) *s++ = quotechar;
|
|
|
|
if ((field == NULL) || (*field == 0))
|
|
{
|
|
if (needs_quote)
|
|
*s++ = quotechar;
|
|
continue;
|
|
}
|
|
|
|
f = field;
|
|
while ((ch = *f++) != 0)
|
|
{
|
|
if (ch == quotechar)
|
|
{
|
|
*s++ = ch;
|
|
*s++ = ch;
|
|
continue;
|
|
}
|
|
|
|
if (ch == '\n')
|
|
{
|
|
/* *s++ = '\r'; --- See above comment about excel and \r\n in a field*/
|
|
*s++ = ch;
|
|
continue;
|
|
}
|
|
|
|
*s++ = ch;
|
|
}
|
|
if (needs_quote)
|
|
*s++ = quotechar;
|
|
}
|
|
|
|
*s++ = '\r';
|
|
*s++ = '\n';
|
|
*s = 0;
|
|
|
|
SLfree (fieldflags);
|
|
return encoded_str;
|
|
}
|
|
|
|
static void encode_csv_row_intrin (void)
|
|
{
|
|
SLang_Array_Type *at;
|
|
CSV_Type *csv;
|
|
SLang_MMT_Type *mmt;
|
|
int flags;
|
|
int has_flags;
|
|
char *str;
|
|
|
|
if (SLang_Num_Function_Args == 3)
|
|
{
|
|
if (-1 == SLang_pop_int (&flags))
|
|
return;
|
|
has_flags = 1;
|
|
}
|
|
else has_flags = 0;
|
|
|
|
if (-1 == SLang_pop_array_of_type (&at, SLANG_STRING_TYPE))
|
|
return;
|
|
|
|
if (NULL == (csv = pop_csv_type (&mmt)))
|
|
{
|
|
SLang_free_array (at);
|
|
return;
|
|
}
|
|
|
|
if (0 == has_flags)
|
|
flags = csv->flags;
|
|
|
|
str = csv_encode (csv, (char **)at->data, at->num_elements, flags);
|
|
SLang_free_mmt (mmt);
|
|
SLang_free_array (at);
|
|
(void) SLang_push_malloced_string (str);
|
|
}
|
|
|
|
static void new_csv_encoder_intrin (void)
|
|
{
|
|
CSV_Type *csv;
|
|
SLang_MMT_Type *mmt;
|
|
|
|
if (NULL == (csv = (CSV_Type *)SLmalloc(sizeof(CSV_Type))))
|
|
return;
|
|
memset ((char *)csv, 0, sizeof(CSV_Type));
|
|
|
|
if ((-1 == SLang_pop_int (&csv->flags))
|
|
||(-1 == SLang_pop_char (&csv->quotechar))
|
|
|| (-1 == SLang_pop_char (&csv->delimchar))
|
|
|| (-1 == check_special_chars (csv))
|
|
|| (NULL == (mmt = SLang_create_mmt (CSV_Type_Id, (VOID_STAR)csv))))
|
|
{
|
|
free_csv_type (csv);
|
|
return;
|
|
}
|
|
|
|
if (-1 == SLang_push_mmt (mmt))
|
|
SLang_free_mmt (mmt);
|
|
}
|
|
|
|
#define DUMMY_CSV_TYPE ((SLtype)-1)
|
|
static SLang_Intrin_Fun_Type Module_Intrinsics [] =
|
|
{
|
|
MAKE_INTRINSIC_0("_csv_decoder_new", new_csv_decoder_intrin, SLANG_VOID_TYPE),
|
|
MAKE_INTRINSIC_0("_csv_decode_row", decode_csv_row_intrin, SLANG_VOID_TYPE),
|
|
MAKE_INTRINSIC_0("_csv_encoder_new", new_csv_encoder_intrin, SLANG_VOID_TYPE),
|
|
MAKE_INTRINSIC_0("_csv_encode_row", encode_csv_row_intrin, SLANG_VOID_TYPE),
|
|
SLANG_END_INTRIN_FUN_TABLE
|
|
};
|
|
|
|
static SLang_IConstant_Type Module_Constants [] =
|
|
{
|
|
MAKE_ICONSTANT("CSV_SKIP_BLANK_ROWS", CSV_SKIP_BLANK_ROWS),
|
|
MAKE_ICONSTANT("CSV_STOP_BLANK_ROWS", CSV_STOP_ON_BLANK_ROWS),
|
|
MAKE_ICONSTANT("CSV_QUOTE_SOME", CSV_QUOTE_SOME),
|
|
MAKE_ICONSTANT("CSV_QUOTE_ALL", CSV_QUOTE_ALL),
|
|
SLANG_END_ICONST_TABLE
|
|
};
|
|
|
|
static void destroy_csv (SLtype type, VOID_STAR f)
|
|
{
|
|
(void) type;
|
|
free_csv_type ((CSV_Type *)f);
|
|
}
|
|
|
|
static int register_csv_type (void)
|
|
{
|
|
SLang_Class_Type *cl;
|
|
|
|
if (CSV_Type_Id != 0)
|
|
return 0;
|
|
|
|
if (NULL == (cl = SLclass_allocate_class ("CSV_Type")))
|
|
return -1;
|
|
|
|
if (-1 == SLclass_set_destroy_function (cl, destroy_csv))
|
|
return -1;
|
|
|
|
/* By registering as SLANG_VOID_TYPE, slang will dynamically allocate a
|
|
* type.
|
|
*/
|
|
if (-1 == SLclass_register_class (cl, SLANG_VOID_TYPE, sizeof (CSV_Type), SLANG_CLASS_TYPE_MMT))
|
|
return -1;
|
|
|
|
CSV_Type_Id = SLclass_get_class_id (cl);
|
|
if (-1 == SLclass_patch_intrin_fun_table1 (Module_Intrinsics, DUMMY_CSV_TYPE, CSV_Type_Id))
|
|
return -1;
|
|
|
|
return 0;
|
|
}
|
|
|
|
int init_csv_module_ns (char *ns_name)
|
|
{
|
|
SLang_NameSpace_Type *ns = SLns_create_namespace (ns_name);
|
|
if (ns == NULL)
|
|
return -1;
|
|
|
|
if (-1 == register_csv_type ())
|
|
return -1;
|
|
|
|
if ((-1 == SLns_add_intrin_fun_table (ns, Module_Intrinsics, NULL))
|
|
|| (-1 == SLns_add_iconstant_table (ns, Module_Constants, NULL)))
|
|
return -1;
|
|
|
|
return 0;
|
|
}
|
|
|
|
/* This function is optional */
|
|
void deinit_csv_module (void)
|
|
{
|
|
}
|