Rev 2753 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
2753 | jermar | 1 | /* |
2 | * Copyright (c) 2008 Jakub Jermar |
||
3 | * All rights reserved. |
||
4 | * |
||
5 | * Redistribution and use in source and binary forms, with or without |
||
6 | * modification, are permitted provided that the following conditions |
||
7 | * are met: |
||
8 | * |
||
9 | * - Redistributions of source code must retain the above copyright |
||
10 | * notice, this list of conditions and the following disclaimer. |
||
11 | * - Redistributions in binary form must reproduce the above copyright |
||
12 | * notice, this list of conditions and the following disclaimer in the |
||
13 | * documentation and/or other materials provided with the distribution. |
||
14 | * - The name of the author may not be used to endorse or promote products |
||
15 | * derived from this software without specific prior written permission. |
||
16 | * |
||
17 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR |
||
18 | * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES |
||
19 | * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. |
||
20 | * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, |
||
21 | * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT |
||
22 | * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
||
23 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
||
24 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
||
25 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF |
||
26 | * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
||
27 | */ |
||
28 | |||
29 | /** @addtogroup libc |
||
30 | * @{ |
||
31 | */ |
||
32 | |||
33 | /** |
||
34 | * @file |
||
35 | * @brief |
||
36 | */ |
||
37 | |||
38 | #include <stdlib.h> |
||
39 | |||
40 | /** Token types used for tokenization of path. */ |
||
41 | typedef enum { |
||
42 | TK_INVALID, |
||
43 | TK_SLASH, |
||
44 | TK_DOT, |
||
45 | TK_DOTDOT, |
||
46 | TK_COMP, |
||
47 | TK_NUL |
||
48 | } tokval_t; |
||
49 | |||
50 | typedef struct { |
||
51 | tokval_t kind; |
||
52 | char *start; |
||
53 | char *stop; |
||
54 | } token_t; |
||
55 | |||
56 | /** Fake up the TK_SLASH token. */ |
||
57 | static token_t slash_token(char *start) |
||
58 | { |
||
59 | token_t ret; |
||
60 | ret.kind = TK_SLASH; |
||
61 | ret.start = start; |
||
62 | ret.stop = start; |
||
63 | return ret; |
||
64 | } |
||
65 | |||
66 | /** Given a token, return the next token. */ |
||
67 | static token_t next_token(token_t *cur) |
||
68 | { |
||
69 | token_t ret; |
||
70 | |||
71 | if (cur->stop[1] == '\0') { |
||
72 | ret.kind = TK_NUL; |
||
73 | ret.start = cur->stop + 1; |
||
74 | ret.stop = ret.start; |
||
75 | return ret; |
||
76 | } |
||
77 | if (cur->stop[1] == '/') { |
||
78 | ret.kind = TK_SLASH; |
||
79 | ret.start = cur->stop + 1; |
||
80 | ret.stop = ret.start; |
||
81 | return ret; |
||
82 | } |
||
83 | if (cur->stop[1] == '.' && (!cur->stop[2] || cur->stop[2] == '/')) { |
||
84 | ret.kind = TK_DOT; |
||
85 | ret.start = cur->stop + 1; |
||
86 | ret.stop = ret.start; |
||
87 | return ret; |
||
88 | } |
||
89 | if (cur->stop[1] == '.' && cur->stop[2] == '.' && |
||
90 | (!cur->stop[3] || cur->stop[3] == '/')) { |
||
91 | ret.kind = TK_DOTDOT; |
||
92 | ret.start = cur->stop + 1; |
||
93 | ret.stop = cur->stop + 2; |
||
94 | return ret; |
||
95 | } |
||
96 | unsigned i; |
||
97 | for (i = 1; cur->stop[i] && cur->stop[i] != '/'; i++) |
||
98 | ; |
||
99 | ret.kind = TK_COMP; |
||
100 | ret.start = &cur->stop[1]; |
||
101 | ret.stop = &cur->stop[i - 1]; |
||
102 | return ret; |
||
103 | } |
||
104 | |||
105 | /** States used by canonify(). */ |
||
106 | typedef enum { |
||
107 | S_INI, |
||
108 | S_A, |
||
109 | S_B, |
||
110 | S_C, |
||
111 | S_ACCEPT, |
||
112 | S_RESTART, |
||
113 | S_REJECT |
||
114 | } state_t; |
||
115 | |||
116 | typedef struct { |
||
117 | state_t s; |
||
118 | void (* f)(token_t *, token_t *, token_t *); |
||
119 | } change_state_t; |
||
120 | |||
121 | /* |
||
122 | * Actions that can be performed when transitioning from one |
||
123 | * state of canonify() to another. |
||
124 | */ |
||
125 | static void set_first_slash(token_t *t, token_t *tfsl, token_t *tlcomp) |
||
126 | { |
||
127 | *tfsl = *t; |
||
3331 | jermar | 128 | *tlcomp = *t; |
2753 | jermar | 129 | } |
130 | static void save_component(token_t *t, token_t *tfsl, token_t *tlcomp) |
||
131 | { |
||
132 | *tlcomp = *t; |
||
133 | } |
||
134 | static void terminate_slash(token_t *t, token_t *tfsl, token_t *tlcomp) |
||
135 | { |
||
136 | if (tfsl->stop[1]) /* avoid writing to a well-formatted path */ |
||
137 | tfsl->stop[1] = '\0'; |
||
138 | } |
||
139 | static void remove_trailing_slash(token_t *t, token_t *tfsl, token_t *tlcomp) |
||
140 | { |
||
141 | t->start[-1] = '\0'; |
||
142 | } |
||
143 | /** Eat the extra '/'.. |
||
144 | * |
||
145 | * @param t The current TK_SLASH token. |
||
146 | */ |
||
147 | static void shift_slash(token_t *t, token_t *tfsl, token_t *tlcomp) |
||
148 | { |
||
149 | char *p = t->start; |
||
150 | char *q = t->stop + 1; |
||
151 | while ((*p++ = *q++)) |
||
152 | ; |
||
153 | } |
||
154 | /** Eat the extra '.'. |
||
155 | * |
||
156 | * @param t The current TK_DOT token. |
||
157 | */ |
||
158 | static void shift_dot(token_t *t, token_t *tfsl, token_t *tlcomp) |
||
159 | { |
||
160 | char *p = t->start; |
||
161 | char *q = t->stop + 1; |
||
162 | while ((*p++ = *q++)) |
||
163 | ; |
||
164 | } |
||
165 | /** Collapse the TK_COMP TK_SLASH TK_DOTDOT pattern. |
||
166 | * |
||
167 | * @param t The current TK_DOTDOT token. |
||
168 | * @param tlcomp The last TK_COMP token. |
||
169 | */ |
||
170 | static void shift_dotdot(token_t *t, token_t *tfsl, token_t *tlcomp) |
||
171 | { |
||
172 | char *p = tlcomp->start; |
||
173 | char *q = t->stop + 1; |
||
174 | while ((*p++ = *q++)) |
||
175 | ; |
||
176 | } |
||
177 | |||
178 | /** Transition function for canonify(). */ |
||
179 | static change_state_t trans[4][6] = { |
||
180 | [S_INI] = { |
||
181 | [TK_SLASH] = { |
||
182 | .s = S_A, |
||
183 | .f = set_first_slash, |
||
184 | }, |
||
185 | [TK_DOT] = { |
||
186 | .s = S_REJECT, |
||
187 | .f = NULL, |
||
188 | }, |
||
189 | [TK_DOTDOT] = { |
||
190 | .s = S_REJECT, |
||
191 | .f = NULL, |
||
192 | }, |
||
193 | [TK_COMP] = { |
||
194 | .s = S_REJECT, |
||
195 | .f = NULL, |
||
196 | }, |
||
197 | [TK_NUL] = { |
||
198 | .s = S_REJECT, |
||
199 | .f = NULL, |
||
200 | }, |
||
201 | [TK_INVALID] = { |
||
202 | .s = S_REJECT, |
||
203 | .f = NULL, |
||
204 | }, |
||
205 | }, |
||
206 | [S_A] = { |
||
207 | [TK_SLASH] = { |
||
208 | .s = S_A, |
||
209 | .f = set_first_slash, |
||
210 | }, |
||
211 | [TK_DOT] = { |
||
212 | .s = S_A, |
||
213 | .f = NULL, |
||
214 | }, |
||
215 | [TK_DOTDOT] = { |
||
216 | .s = S_A, |
||
217 | .f = NULL, |
||
218 | }, |
||
219 | [TK_COMP] = { |
||
220 | .s = S_B, |
||
221 | .f = save_component, |
||
222 | }, |
||
223 | [TK_NUL] = { |
||
224 | .s = S_ACCEPT, |
||
225 | .f = terminate_slash, |
||
226 | }, |
||
227 | [TK_INVALID] = { |
||
228 | .s = S_REJECT, |
||
229 | .f = NULL, |
||
230 | }, |
||
231 | }, |
||
232 | [S_B] = { |
||
233 | [TK_SLASH] = { |
||
234 | .s = S_C, |
||
235 | .f = NULL, |
||
236 | }, |
||
237 | [TK_DOT] = { |
||
238 | .s = S_REJECT, |
||
239 | .f = NULL, |
||
240 | }, |
||
241 | [TK_DOTDOT] = { |
||
242 | .s = S_REJECT, |
||
243 | .f = NULL, |
||
244 | }, |
||
245 | [TK_COMP] = { |
||
246 | .s = S_REJECT, |
||
247 | .f = NULL, |
||
248 | }, |
||
249 | [TK_NUL] = { |
||
250 | .s = S_ACCEPT, |
||
251 | .f = NULL, |
||
252 | }, |
||
253 | [TK_INVALID] = { |
||
254 | .s = S_REJECT, |
||
255 | .f = NULL, |
||
256 | }, |
||
257 | }, |
||
258 | [S_C] = { |
||
259 | [TK_SLASH] = { |
||
260 | .s = S_RESTART, |
||
261 | .f = shift_slash, |
||
262 | }, |
||
263 | [TK_DOT] = { |
||
264 | .s = S_RESTART, |
||
265 | .f = shift_dot, |
||
266 | }, |
||
267 | [TK_DOTDOT] = { |
||
268 | .s = S_RESTART, |
||
269 | .f = shift_dotdot, |
||
270 | }, |
||
271 | [TK_COMP] = { |
||
272 | .s = S_B, |
||
273 | .f = save_component, |
||
274 | }, |
||
275 | [TK_NUL] = { |
||
276 | .s = S_ACCEPT, |
||
277 | .f = remove_trailing_slash, |
||
278 | }, |
||
279 | [TK_INVALID] = { |
||
280 | .s = S_REJECT, |
||
281 | .f = NULL, |
||
282 | }, |
||
283 | } |
||
284 | }; |
||
285 | |||
286 | /** Canonify a file system path. |
||
287 | * |
||
288 | * A file system path is canonical, if the following holds: |
||
289 | * 1) the path is absolute (i.e. a/b/c is not canonical) |
||
290 | * 2) there is no trailing slash in the path (i.e. /a/b/c is not canonical) |
||
291 | * 3) there is no extra slash in the path (i.e. /a//b/c is not canonical) |
||
292 | * 4) there is no '.' component in the path (i.e. /a/./b/c is not canonical) |
||
293 | * 5) there is no '..' component in the path (i.e. /a/b/../c is not canonical) |
||
294 | * |
||
295 | * This function makes a potentially non-canonical file system path canonical. |
||
296 | * It works in-place and requires a NULL-terminated input string. |
||
297 | * |
||
298 | * @param path Path to be canonified. |
||
299 | * @param lenp Pointer where the length of the final path will be |
||
300 | * stored. Can be NULL. |
||
301 | * |
||
302 | * @return Canonified path or NULL on failure. |
||
303 | */ |
||
304 | char *canonify(char *path, size_t *lenp) |
||
305 | { |
||
306 | state_t state; |
||
307 | token_t t; |
||
308 | token_t tfsl; /* first slash */ |
||
309 | token_t tlcomp; /* last component */ |
||
310 | if (*path != '/') |
||
311 | return NULL; |
||
312 | tfsl = slash_token(path); |
||
313 | restart: |
||
314 | state = S_INI; |
||
315 | t = tfsl; |
||
316 | tlcomp = tfsl; |
||
317 | while (state != S_ACCEPT && state != S_RESTART && state != S_REJECT) { |
||
318 | if (trans[state][t.kind].f) |
||
319 | trans[state][t.kind].f(&t, &tfsl, &tlcomp); |
||
320 | state = trans[state][t.kind].s; |
||
321 | t = next_token(&t); |
||
322 | } |
||
323 | |||
324 | switch (state) { |
||
325 | case S_RESTART: |
||
326 | goto restart; |
||
327 | case S_REJECT: |
||
328 | return NULL; |
||
329 | case S_ACCEPT: |
||
330 | if (lenp) |
||
331 | *lenp = (size_t)((tlcomp.stop - tfsl.start) + 1); |
||
332 | return tfsl.start; |
||
333 | default: |
||
334 | abort(); |
||
335 | } |
||
336 | } |
||
337 | |||
338 | /** |
||
339 | * @} |
||
340 | */ |