1
1

Added -f option to import an exported directory structure

This is the first working version. There's a few TODO's left.
Этот коммит содержится в:
Yorhel 2012-09-05 13:52:12 +02:00
родитель ef4abec6cf
Коммит 10dca5503a
5 изменённых файлов: 603 добавлений и 15 удалений

Просмотреть файл

@ -6,6 +6,7 @@ ncdu_SOURCES=\
src/dirlist.c\
src/dir_common.c\
src/dir_export.c\
src/dir_import.c\
src/dir_mem.c\
src/dir_scan.c\
src/exclude.c\

Просмотреть файл

@ -101,10 +101,15 @@ void dir_mem_init(struct dir *);
int dir_export_init(const char *fn);
/* Function set by input code. Returns dir_output.final(). */
int (*dir_process)();
/* Scanning a live directory */
extern int dir_scan_smfs;
void dir_scan_init(const char *path);
int dir_scan_process();
/* Importing a file */
int dir_import_init(const char *fn);
/* The currently configured output functions. */

576
src/dir_import.c Обычный файл
Просмотреть файл

@ -0,0 +1,576 @@
/* ncdu - NCurses Disk Usage
Copyright (c) 2007-2012 Yoran Heling
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
/* This JSON parser has the following limitations:
* - No support for character encodings incompatible with ASCII (e.g.
* UTF-16/32)
* - Doesn't validate UTF-8 correctness (in fact, besides the ASCII part this
* parser doesn't know anything about encoding).
* - Doesn't validate that there are no duplicate keys in JSON objects.
* - Isn't very strict with validating non-integer numbers.
* - Does not check nesting level, easily allows stack overflow. (TODO: FIX!)
*/
#include "global.h"
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <errno.h>
#include <limits.h>
/* Max. length of any JSON string we're interested in. A string may of course
* be larger, we're not going to read more than MAX_VAL in memory. If a string
* we're interested in (e.g. a file name) is longer than this, reading the
* import will results in an error. */
#define MAX_VAL (32*1024)
/* Minimum number of bytes we request from fread() */
#define MIN_READ_SIZE 1024
/* Read buffer size. Must be at least 2*MIN_READ_SIZE, everything larger
* improves performance. */
#define READ_BUF_SIZE (32*1024)
/* Use a struct for easy batch-allocation and deallocation of state data. */
struct ctx {
FILE *stream;
int line;
int byte;
int eof;
int items;
char *buf; /* points into readbuf, always zero-terminated. */
char *lastfill; /* points into readbuf, location of the zero terminator. */
char val[MAX_VAL];
char readbuf[READ_BUF_SIZE];
} *ctx;
/* Fills readbuf with data from the stream. *buf will have at least n (<
* READ_BUF_SIZE) bytes available, unless the stream reached EOF or an error
* occured. If the file data contains a null-type, this is considered an error.
* Returns 0 on success, non-zero on error. */
static int fill(int n) {
int r;
if(ctx->eof)
return 0;
r = READ_BUF_SIZE-(ctx->lastfill - ctx->readbuf); /* number of bytes left in the buffer */
if(n < r)
n = r-1;
if(n < MIN_READ_SIZE) {
r = ctx->lastfill - ctx->buf; /* number of unread bytes left in the buffer */
memcpy(ctx->readbuf, ctx->buf, r);
ctx->lastfill = ctx->readbuf + r;
ctx->buf = ctx->readbuf;
n = READ_BUF_SIZE-r-1;
}
do {
r = fread(ctx->lastfill, 1, n, ctx->stream);
if(r != n) {
if(feof(ctx->stream))
ctx->eof = 1;
else if(ferror(ctx->stream)) {
dir_seterr("Read error: %s", strerror(errno));
return 1;
}
}
ctx->lastfill[r] = 0;
if(strlen(ctx->lastfill) != (size_t)r) {
dir_seterr("Zero-byte found in JSON stream");
return 1;
}
ctx->lastfill += r;
n -= r;
} while(!ctx->eof && n > MIN_READ_SIZE);
return 0;
}
/* Two macros that break function calling behaviour, but are damn convenient */
#define E(_x, _m) do {\
if((_x) && !dir_fatalerr) {\
dir_seterr("Line %d byte %d: %s", ctx->line, ctx->byte, _m);\
return 1;\
}\
} while(0)
#define C(_x) do {\
if(_x)\
return 1;\
} while(0)
/* Require at least n bytes in the buffer, throw an error on early EOF.
* (Macro to quickly handle the common case) */
#define rfill1 (!*ctx->buf && _rfill(1))
#define rfill(_n) ((ctx->lastfill - ctx->buf < (_n)) && _rfill(_n))
static int _rfill(int n) {
C(fill(n));
E(ctx->lastfill - ctx->buf < n, "Unexpected EOF");
return 0;
}
/* Consumes n bytes from the buffer. */
static inline void con(int n) {
ctx->buf += n;
ctx->byte += n;
}
/* Consumes any whitespace. If *ctx->buf == 0 after this function, we've reached EOF. */
static int cons() {
while(1) {
C(!*ctx->buf && fill(1));
switch(*ctx->buf) {
case 0x0A:
/* Special-case the newline-character with respect to consuming stuff
* from the buffer. This is the only function which *can* consume the
* newline character, so it's more efficient to handle it in here rather
* than in the more general con(). */
ctx->buf++;
ctx->line++;
ctx->byte = 0;
break;
case 0x20:
case 0x09:
case 0x0D:
con(1);
break;
default:
return 0;
}
}
}
static int rstring_esc(char **dest, int *destlen) {
unsigned int n;
#define ap(c) if(*destlen > 1) { *((*dest)++) = c; (*destlen)--; }
switch(*ctx->buf) {
case '"': ap('"'); con(1); break;
case '\\': ap('\\'); con(1); break;
case '/': ap('/'); con(1); break;
case 'b': ap(0x08); con(1); break;
case 'f': ap(0x0C); con(1); break;
case 'n': ap(0x0A); con(1); break;
case 'r': ap(0x0D); con(1); break;
case 't': ap(0x09); con(1); break;
case 'u':
C(rfill(5));
#define hn(n) (n >= '0' && n <= '9' ? n-'0' : n >= 'A' && n <= 'F' ? n-'A'+10 : n >= 'a' && n <= 'f' ? n-'a'+10 : 1<<16)
n = (hn(ctx->buf[1])<<12) + (hn(ctx->buf[2])<<8) + (hn(ctx->buf[3])<<4) + hn(ctx->buf[4]);
#undef hn
if(n <= 0x007F) {
ap(n);
} else if(n <= 0x07FF) {
ap(0xC0 | (n>>6));
ap(0x80 | (n & 0x3F));
} else if(n <= 0xFFFF) {
ap(0xE0 | (n>>12));
ap(0x80 | ((n>>6) & 0x3F));
ap(0x80 | (n & 0x3F));
} else // this happens if there was an invalid character (n >= (1<<16))
E(1, "Invalid character in \\u escape");
con(5);
break;
default:
E(1, "Invalid escape sequence");
}
#undef ap
return 0;
}
/* Parse a JSON string and write it to *dest (max. destlen). Consumes but
* otherwise ignores any characters if the string is longer than destlen. *dest
* will be null-terminated, dest[destlen-1] = 0 if the string was cut just long
* enough of was cut off. That byte will be left untouched if the string is
* small enough. */
static int rstring(char *dest, int destlen) {
C(rfill1);
E(*ctx->buf != '"', "Expected string");
con(1);
while(1) {
C(rfill1);
if(*ctx->buf == '"')
break;
if(*ctx->buf == '\\') {
con(1);
C(rstring_esc(&dest, &destlen));
continue;
}
E((unsigned char)*ctx->buf <= 0x1F || (unsigned char)*ctx->buf == 0x7F, "Invalid character");
if(destlen > 1) {
*(dest++) = *ctx->buf;
destlen--;
}
con(1);
}
con(1);
if(destlen > 0)
*dest = 0;
return 0;
}
/* Parse and consume a JSON integer. Throws an error if the value does not fit
* in an uint64_t, is not an integer or is larger than 'max'. */
static int rint64(uint64_t *val, uint64_t max) {
uint64_t v;
int haschar = 0;
*val = 0;
while(1) {
C(!*ctx->buf && fill(1));
if(*ctx->buf == '0' && !haschar) {
con(1);
break;
}
if(*ctx->buf >= '0' && *ctx->buf <= '9') {
haschar = 1;
v = (*val)*10 + (*ctx->buf-'0');
E(v < *val, "Invalid (positive) integer");
*val = v;
con(1);
continue;
}
E(!haschar, "Invalid (positive) integer");
break;
}
E(*val > max, "Integer out of range");
return 0;
}
/* Parse and consume a JSON number. The result is discarded.
* TODO: Improve validation. */
static int rnum() {
int haschar = 0;
C(rfill1);
while(1) {
C(!*ctx->buf && fill(1));
if(*ctx->buf == 'e' || *ctx->buf == 'E' || *ctx->buf == '-' || *ctx->buf == '+' || (*ctx->buf >= '0' && *ctx->buf <= '9')) {
haschar = 1;
con(1);
} else {
E(!haschar, "Invalid JSON value");
break;
}
}
return 0;
}
static int rlit(const char *v, int len) {
C(rfill(len));
E(strncmp(ctx->buf, v, len) != 0, "Invalid JSON value");
con(len);
return 0;
}
/* Parse the "<space> <string> <space> : <space>" part of an object key. */
static int rkey(char *dest, int destlen) {
C(cons() || rstring(dest, destlen) || cons());
E(*ctx->buf != ':', "Expected ':'");
con(1);
return cons();
}
/* (Recursively) parse and consume any JSON value. The result is discarded. */
static int rval() {
C(rfill1);
switch(*ctx->buf) {
case 't': /* true */
C(rlit("true", 4));
break;
case 'f': /* false */
C(rlit("false", 5));
break;
case 'n': /* null */
C(rlit("null", 4));
break;
case '"': /* string */
C(rstring(NULL, 0));
break;
case '{': /* object */
con(1);
while(1) {
C(rkey(NULL, 0) || rval() || cons());
if(*ctx->buf == '}')
break;
E(*ctx->buf != ',', "Expected ',' or '}'");
con(1);
}
con(1);
break;
case '[': /* array */
con(1);
while(1) {
C(cons() || rval() || cons());
if(*ctx->buf == ']')
break;
E(*ctx->buf != ',', "Expected ',' or ']'");
con(1);
}
con(1);
break;
default: /* assume number */
C(rnum());
break;
}
return 0;
}
/* Consumes everything up to the root item, and checks that this item is a dir. */
static int header() {
uint64_t v;
C(cons());
E(*ctx->buf != '[', "Expected JSON array");
con(1);
C(cons() || rint64(&v, 10000) || cons());
E(v != 1, "Incompatible major format version");
E(*ctx->buf != ',', "Expected ','");
con(1);
C(cons() || rint64(&v, 10000) || cons()); /* Ignore the minor version for now */
E(*ctx->buf != ',', "Expected ','");
con(1);
/* Metadata block is currently ignored */
C(cons() || rval() || cons());
E(*ctx->buf != ',', "Expected ','");
con(1);
C(cons());
E(*ctx->buf != '[', "Top-level item must be a directory");
return 0;
}
static int item(uint64_t);
/* Read and add dir contents */
static int itemdir(uint64_t dev) {
while(1) {
C(cons());
if(*ctx->buf == ']')
break;
E(*ctx->buf != ',', "Expected ',' or ']'");
con(1);
C(cons());
item(dev);
}
con(1);
C(cons());
return 0;
}
static int iteminfo(struct dir **item, uint64_t dev, int isdir) {
static struct dir dir;
struct dir *tmp, *d = &dir;
uint64_t iv;
memset(d, 0, sizeof(struct dir));
d->flags |= isdir ? FF_DIR : FF_FILE;
d->dev = dev;
E(*ctx->buf != '{', "Expected JSON object");
con(1);
while(1) {
C(rkey(ctx->val, MAX_VAL));
/* TODO: strcmp() in this fashion isn't very fast. */
if(strcmp(ctx->val, "name") == 0) { /* name */
ctx->val[MAX_VAL-1] = 1;
C(rstring(ctx->val, MAX_VAL));
E(ctx->val[MAX_VAL-1] != 1, "Too large string value");
tmp = dir_createstruct(ctx->val);
memcpy(tmp, d, SDIRSIZE-1);
d = tmp;
} else if(strcmp(ctx->val, "asize") == 0) { /* asize */
C(rint64(&iv, INT64_MAX));
d->asize = iv;
} else if(strcmp(ctx->val, "dsize") == 0) { /* dsize */
C(rint64(&iv, INT64_MAX));
d->size = iv;
} else if(strcmp(ctx->val, "dev") == 0) { /* dev */
C(rint64(&iv, UINT64_MAX));
d->dev = iv;
} else if(strcmp(ctx->val, "ino") == 0) { /* ino */
C(rint64(&iv, UINT64_MAX));
d->ino = iv;
} else if(strcmp(ctx->val, "hlnkc") == 0) { /* hlnkc */
if(*ctx->buf == 't') {
C(rlit("true", 4));
d->flags |= FF_HLNKC;
} else
C(rlit("false", 5));
} else if(strcmp(ctx->val, "read_error") == 0) { /* read_error */
if(*ctx->buf == 't') {
C(rlit("true", 4));
d->flags |= FF_ERR;
} else
C(rlit("false", 5));
} else if(strcmp(ctx->val, "excluded") == 0) { /* excluded */
C(rstring(ctx->val, 8));
if(strcmp(ctx->val, "otherfs") == 0)
d->flags |= FF_OTHFS;
else
d->flags |= FF_EXL;
} else if(strcmp(ctx->val, "notreg") == 0) { /* notreg */
if(*ctx->buf == 't') {
C(rlit("true", 4));
d->flags &= ~FF_FILE;
} else
C(rlit("false", 5));
} else
C(rval());
C(cons());
if(*ctx->buf == '}')
break;
E(*ctx->buf != ',', "Expected ',' or '}'");
con(1);
}
con(1);
E(!*d->name, "No name field present in item information object");
*item = d;
ctx->items++;
return input_handle(1);
}
/* Recursively reads a file or directory item */
static int item(uint64_t dev) {
int isdir = 0;
int isroot = ctx->items == 0;
struct dir *d = NULL;
if(*ctx->buf == '[') {
isdir = 1;
con(1);
C(cons());
}
C(iteminfo(&d, dev, isdir));
dev = d->dev;
if(isroot)
dir_curpath_set(d->name);
else
dir_curpath_enter(d->name);
if(isdir) {
if(dir_output.item(d)) {
dir_seterr("Output error: %s", strerror(errno));
return 1;
}
C(itemdir(dev));
if(dir_output.item(NULL)) {
dir_seterr("Output error: %s", strerror(errno));
return 1;
}
} else if(dir_output.item(d)) {
dir_seterr("Output error: %s", strerror(errno));
return 1;
}
if(!isroot)
dir_curpath_leave();
else /* The root item must not be empty. */
E(ctx->items <= 1, "Empty directory");
return 0;
}
static int footer() {
C(cons());
E(*ctx->buf != ']', "Expected ']'");
con(1);
C(cons());
E(*ctx->buf, "Trailing garbage");
return 0;
}
static int process() {
int fail = 0;
header();
if(!dir_fatalerr)
fail = item(0);
if(!dir_fatalerr)
footer();
if(fclose(ctx->stream) && !dir_fatalerr)
dir_seterr("Error closing file: %s", strerror(errno));
free(ctx);
while(dir_fatalerr && !input_handle(0))
;
return dir_output.final(dir_fatalerr || fail);
}
int dir_import_init(const char *fn) {
FILE *stream;
if(strcmp(fn, "-") == 0)
stream = stdin;
else if((stream = fopen(fn, "r")) == NULL)
return 1;
ctx = malloc(sizeof(struct ctx));
ctx->stream = stream;
ctx->line = 1;
ctx->byte = ctx->eof = ctx->items = 0;
ctx->buf = ctx->lastfill = ctx->readbuf;
ctx->readbuf[0] = 0;
dir_curpath_set(fn);
dir_process = process;
return 0;
}

Просмотреть файл

@ -234,8 +234,7 @@ static int dir_walk(char *dir) {
}
/* Returns 0 to continue running ncdu, 1 to quit. */
int dir_scan_process() {
static int process() {
char *path;
char *dir;
int fail = 0;
@ -296,5 +295,6 @@ void dir_scan_init(const char *path) {
dir_curpath_set(path);
dir_setlasterr(NULL);
dir_seterr(NULL);
dir_process = process;
pstate = ST_CALC;
}

Просмотреть файл

@ -102,9 +102,10 @@ int input_handle(int wait) {
/* parse command line */
static char *argv_parse(int argc, char **argv) {
static void argv_parse(int argc, char **argv) {
int i, j, len;
char *export = NULL;
char *import = NULL;
char *dir = NULL;
dir_ui = -1;
@ -112,7 +113,7 @@ static char *argv_parse(int argc, char **argv) {
for(i=1; i<argc; i++) {
if(argv[i][0] == '-') {
/* flags requiring arguments */
if(!strcmp(argv[i], "-X") || !strcmp(argv[i], "-u") || !strcmp(argv[i], "-o")
if(!strcmp(argv[i], "-X") || !strcmp(argv[i], "-u") || !strcmp(argv[i], "-o") || !strcmp(argv[i], "-f")
|| !strcmp(argv[i], "--exclude-from") || !strcmp(argv[i], "--exclude")) {
if(i+1 >= argc) {
printf("Option %s requires an argument\n", argv[i]);
@ -126,6 +127,8 @@ static char *argv_parse(int argc, char **argv) {
dir_ui = argv[i][0]-'0';
} else if(strcmp(argv[i], "-o") == 0)
export = argv[++i];
else if(strcmp(argv[i], "-f") == 0)
import = argv[++i];
else if(strcmp(argv[i], "--exclude") == 0)
exclude_add(argv[++i]);
else if(exclude_addfile(argv[++i])) {
@ -150,6 +153,7 @@ static char *argv_parse(int argc, char **argv) {
printf(" -x Same filesystem\n");
printf(" -r Read only\n");
printf(" -o FILE Export scanned directory to FILE\n");
printf(" -f FILE Import scanned directory from FILE\n");
printf(" -u <0-2> UI to use when scanning (0=minimal,2=verbose)\n");
printf(" --exclude PATTERN Exclude files that match PATTERN\n");
printf(" -X, --exclude-from FILE Exclude files that match any pattern in FILE\n");
@ -175,12 +179,20 @@ static char *argv_parse(int argc, char **argv) {
} else
dir_mem_init(NULL);
if(import) {
if(dir_import_init(import)) {
printf("Can't open %s: %s\n", import, strerror(errno));
exit(1);
}
if(strcmp(import, "-") == 0)
ncurses_tty = 1;
} else
dir_scan_init(dir ? dir : ".");
/* Use the single-line scan feedback by default when exporting to file, no
* feedback when exporting to stdout. */
if(dir_ui == -1)
dir_ui = export && strcmp(export, "-") == 0 ? 0 : export ? 1 : 2;
return dir;
}
@ -223,14 +235,8 @@ static void init_nc() {
/* main program */
int main(int argc, char **argv) {
char *dir;
setlocale(LC_ALL, "");
if((dir = argv_parse(argc, argv)) == NULL)
dir = ".";
dir_scan_init(dir);
argv_parse(argc, argv);
if(dir_ui == 2)
init_nc();
@ -245,7 +251,7 @@ int main(int argc, char **argv) {
}
if(pstate == ST_CALC) {
if(dir_scan_process()) {
if(dir_process()) {
if(dir_ui == 1)
fputc('\n', stderr);
break;