Subversion Repositories HelenOS

Rev

Rev 3403 | Blame | Compare with Previous | Last modification | View Log | Download | RSS feed

/*
 * Copyright (c) 2008 Jakub Jermar
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *
 * - Redistributions of source code must retain the above copyright
 *   notice, this list of conditions and the following disclaimer.
 * - Redistributions in binary form must reproduce the above copyright
 *   notice, this list of conditions and the following disclaimer in the
 *   documentation and/or other materials provided with the distribution.
 * - The name of the author may not be used to endorse or promote products
 *   derived from this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

/** @addtogroup libc 
 * @{
 */ 

/**
 * @file
 * @brief
 */

#include <stdlib.h>
#include <vfs/canonify.h>

/** Token types used for tokenization of path. */
typedef enum {
    TK_INVALID,
    TK_SLASH,
    TK_DOT,
    TK_DOTDOT,
    TK_COMP,
    TK_NUL
} tokval_t;

typedef struct {
    tokval_t kind;
    char *start;
    char *stop;
} token_t;

/** Fake up the TK_SLASH token. */
static token_t slash_token(char *start)
{
    token_t ret;
    ret.kind = TK_SLASH;
    ret.start = start;
    ret.stop = start;
    return ret;
}

/** Given a token, return the next token. */
static token_t next_token(token_t *cur)
{
    token_t ret;

    if (cur->stop[1] == '\0') {
        ret.kind = TK_NUL;
        ret.start = cur->stop + 1;
        ret.stop = ret.start;
        return ret;
    }
    if (cur->stop[1] == '/') {
        ret.kind = TK_SLASH;
        ret.start = cur->stop + 1;
        ret.stop = ret.start;
        return ret;
    }
    if (cur->stop[1] == '.' && (!cur->stop[2] || cur->stop[2] == '/')) {
        ret.kind = TK_DOT;
        ret.start = cur->stop + 1;
        ret.stop = ret.start;
        return ret;
    }
    if (cur->stop[1] == '.' && cur->stop[2] == '.' &&
        (!cur->stop[3] || cur->stop[3] == '/')) {
        ret.kind = TK_DOTDOT;
        ret.start = cur->stop + 1;
        ret.stop = cur->stop + 2;
        return ret;
    }
    unsigned i;
    for (i = 1; cur->stop[i] && cur->stop[i] != '/'; i++)
        ;
    ret.kind = TK_COMP;
    ret.start = &cur->stop[1];
    ret.stop = &cur->stop[i - 1];
    return ret;
}

/** States used by canonify(). */
typedef enum {
    S_INI,
    S_A,
    S_B,
    S_C,
    S_ACCEPT,
    S_RESTART,
    S_REJECT
} state_t;

typedef struct {
    state_t s;
    void (* f)(token_t *, token_t *, token_t *);
} change_state_t;

/*
 * Actions that can be performed when transitioning from one
 * state of canonify() to another.
 */
static void set_first_slash(token_t *t, token_t *tfsl, token_t *tlcomp)
{
    *tfsl = *t;
    *tlcomp = *t;
}
static void save_component(token_t *t, token_t *tfsl, token_t *tlcomp)
{
    *tlcomp = *t;
}
static void terminate_slash(token_t *t, token_t *tfsl, token_t *tlcomp)
{
    if (tfsl->stop[1])  /* avoid writing to a well-formatted path */
        tfsl->stop[1] = '\0';
}
static void remove_trailing_slash(token_t *t, token_t *tfsl, token_t *tlcomp)
{
    t->start[-1] = '\0';
}
/** Eat the extra '/'..
 *
 * @param t     The current TK_SLASH token.
 */
static void shift_slash(token_t *t, token_t *tfsl, token_t *tlcomp)
{
    char *p = t->start;
    char *q = t->stop + 1;
    while ((*p++ = *q++))
        ;
}
/** Eat the extra '.'.
 *
 * @param t     The current TK_DOT token.
 */
static void shift_dot(token_t *t, token_t *tfsl, token_t *tlcomp)
{
    char *p = t->start;
    char *q = t->stop + 1;
    while ((*p++ = *q++))
        ;
}
/** Collapse the TK_COMP TK_SLASH TK_DOTDOT pattern.
 *
 * @param t     The current TK_DOTDOT token.
 * @param tlcomp    The last TK_COMP token.
 */
static void shift_dotdot(token_t *t, token_t *tfsl, token_t *tlcomp)
{
    char *p = tlcomp->start;
    char *q = t->stop + 1;
    while ((*p++ = *q++))
        ;
}

/** Transition function for canonify(). */
static change_state_t trans[4][6] = {
    [S_INI] = {
        [TK_SLASH] = {
            .s = S_A,
            .f = set_first_slash,
        },
        [TK_DOT] = {
            .s = S_REJECT,
            .f = NULL,
        },
        [TK_DOTDOT] = {
            .s = S_REJECT,
            .f = NULL,
        },
        [TK_COMP] = {
            .s = S_REJECT,
            .f = NULL,
        },
        [TK_NUL] = {
            .s = S_REJECT,
            .f = NULL,
        },
        [TK_INVALID] = {
            .s = S_REJECT,
            .f = NULL,
        },
    },
    [S_A] = {
        [TK_SLASH] = {
            .s = S_A,
            .f = set_first_slash,
        },
        [TK_DOT] = {
            .s = S_A,
            .f = NULL,
        },
        [TK_DOTDOT] = {
            .s = S_A,
            .f = NULL,
        },
        [TK_COMP] = {
            .s = S_B,
            .f = save_component,
        },
        [TK_NUL] = {
            .s = S_ACCEPT,
            .f = terminate_slash,
        },
        [TK_INVALID] = {
            .s = S_REJECT,
            .f = NULL,
        },
    },
    [S_B] = {
        [TK_SLASH] = {
            .s = S_C,
            .f = NULL,
        },
        [TK_DOT] = {
            .s = S_REJECT,
            .f = NULL,
        },
        [TK_DOTDOT] = {
            .s = S_REJECT,
            .f = NULL,
        },
        [TK_COMP] = {
            .s = S_REJECT,
            .f = NULL,
        },
        [TK_NUL] = {
            .s = S_ACCEPT,
            .f = NULL,
        },
        [TK_INVALID] = {
            .s = S_REJECT,
            .f = NULL,
        },
    },
    [S_C] = {
        [TK_SLASH] = {
            .s = S_RESTART,
            .f = shift_slash,
        },
        [TK_DOT] = {
            .s = S_RESTART,
            .f = shift_dot,
        },
        [TK_DOTDOT] = {
            .s = S_RESTART,
            .f = shift_dotdot,
        },
        [TK_COMP] = {
            .s = S_B,
            .f = save_component,
        },
        [TK_NUL] = {
            .s = S_ACCEPT,
            .f = remove_trailing_slash,
        },
        [TK_INVALID] = {
            .s = S_REJECT,
            .f = NULL,
        },
    }
};

/** Canonify a file system path.
 *
 * A file system path is canonical, if the following holds:
 * 1) the path is absolute (i.e. a/b/c is not canonical)
 * 2) there is no trailing slash in the path (i.e. /a/b/c is not canonical)
 * 3) there is no extra slash in the path (i.e. /a//b/c is not canonical)
 * 4) there is no '.' component in the path (i.e. /a/./b/c is not canonical)
 * 5) there is no '..' component in the path (i.e. /a/b/../c is not canonical) 
 *
 * This function makes a potentially non-canonical file system path canonical.
 * It works in-place and requires a NULL-terminated input string.
 *
 * @param path      Path to be canonified.
 * @param lenp      Pointer where the length of the final path will be
 *          stored. Can be NULL.
 *
 * @return      Canonified path or NULL on failure.
 */
char *canonify(char *path, size_t *lenp)
{
    state_t state;
    token_t t;
    token_t tfsl;       /* first slash */
    token_t tlcomp;     /* last component */
    if (*path != '/')
        return NULL;
    tfsl = slash_token(path);
restart:
    state = S_INI;
    t = tfsl;
    tlcomp = tfsl;
    while (state != S_ACCEPT && state != S_RESTART && state != S_REJECT) {
        if (trans[state][t.kind].f)
            trans[state][t.kind].f(&t, &tfsl, &tlcomp);
        state = trans[state][t.kind].s;
        t = next_token(&t);
    }
    
    switch (state) {
    case S_RESTART:
        goto restart;
    case S_REJECT:
        return NULL;
    case S_ACCEPT:
        if (lenp)
            *lenp = (size_t)((tlcomp.stop - tfsl.start) + 1);
        return tfsl.start; 
    default:
        abort();
    }
}

/**
 * @}
 */