Rev 2753 | Rev 3403 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
2753 | jermar | 1 | /* |
2 | * Copyright (c) 2008 Jakub Jermar |
||
3 | * All rights reserved. |
||
4 | * |
||
5 | * Redistribution and use in source and binary forms, with or without |
||
6 | * modification, are permitted provided that the following conditions |
||
7 | * are met: |
||
8 | * |
||
9 | * - Redistributions of source code must retain the above copyright |
||
10 | * notice, this list of conditions and the following disclaimer. |
||
11 | * - Redistributions in binary form must reproduce the above copyright |
||
12 | * notice, this list of conditions and the following disclaimer in the |
||
13 | * documentation and/or other materials provided with the distribution. |
||
14 | * - The name of the author may not be used to endorse or promote products |
||
15 | * derived from this software without specific prior written permission. |
||
16 | * |
||
17 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR |
||
18 | * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES |
||
19 | * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. |
||
20 | * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, |
||
21 | * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT |
||
22 | * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
||
23 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
||
24 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
||
25 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF |
||
26 | * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
||
27 | */ |
||
28 | |||
29 | /** @addtogroup libc |
||
30 | * @{ |
||
31 | */ |
||
32 | |||
33 | /** |
||
34 | * @file |
||
35 | * @brief |
||
36 | */ |
||
37 | |||
38 | #include <stdlib.h> |
||
39 | |||
40 | /** Token types used for tokenization of path. */ |
||
41 | typedef enum { |
||
42 | TK_INVALID, |
||
43 | TK_SLASH, |
||
44 | TK_DOT, |
||
45 | TK_DOTDOT, |
||
46 | TK_COMP, |
||
47 | TK_NUL |
||
48 | } tokval_t; |
||
49 | |||
50 | typedef struct { |
||
51 | tokval_t kind; |
||
52 | char *start; |
||
53 | char *stop; |
||
54 | } token_t; |
||
55 | |||
56 | /** Fake up the TK_SLASH token. */ |
||
57 | static token_t slash_token(char *start) |
||
58 | { |
||
59 | token_t ret; |
||
60 | ret.kind = TK_SLASH; |
||
61 | ret.start = start; |
||
62 | ret.stop = start; |
||
63 | return ret; |
||
64 | } |
||
65 | |||
66 | /** Given a token, return the next token. */ |
||
67 | static token_t next_token(token_t *cur) |
||
68 | { |
||
69 | token_t ret; |
||
70 | |||
71 | if (cur->stop[1] == '\0') { |
||
72 | ret.kind = TK_NUL; |
||
73 | ret.start = cur->stop + 1; |
||
74 | ret.stop = ret.start; |
||
75 | return ret; |
||
76 | } |
||
77 | if (cur->stop[1] == '/') { |
||
78 | ret.kind = TK_SLASH; |
||
79 | ret.start = cur->stop + 1; |
||
80 | ret.stop = ret.start; |
||
81 | return ret; |
||
82 | } |
||
83 | if (cur->stop[1] == '.' && (!cur->stop[2] || cur->stop[2] == '/')) { |
||
84 | ret.kind = TK_DOT; |
||
85 | ret.start = cur->stop + 1; |
||
86 | ret.stop = ret.start; |
||
87 | return ret; |
||
88 | } |
||
89 | if (cur->stop[1] == '.' && cur->stop[2] == '.' && |
||
90 | (!cur->stop[3] || cur->stop[3] == '/')) { |
||
91 | ret.kind = TK_DOTDOT; |
||
92 | ret.start = cur->stop + 1; |
||
93 | ret.stop = cur->stop + 2; |
||
94 | return ret; |
||
95 | } |
||
96 | unsigned i; |
||
97 | for (i = 1; cur->stop[i] && cur->stop[i] != '/'; i++) |
||
98 | ; |
||
99 | ret.kind = TK_COMP; |
||
100 | ret.start = &cur->stop[1]; |
||
101 | ret.stop = &cur->stop[i - 1]; |
||
102 | return ret; |
||
103 | } |
||
104 | |||
105 | /** States used by canonify(). */ |
||
106 | typedef enum { |
||
107 | S_INI, |
||
108 | S_A, |
||
109 | S_B, |
||
110 | S_C, |
||
111 | S_ACCEPT, |
||
112 | S_RESTART, |
||
113 | S_REJECT |
||
114 | } state_t; |
||
115 | |||
116 | typedef struct { |
||
117 | state_t s; |
||
118 | void (* f)(token_t *, token_t *, token_t *); |
||
119 | } change_state_t; |
||
120 | |||
121 | /* |
||
122 | * Actions that can be performed when transitioning from one |
||
123 | * state of canonify() to another. |
||
124 | */ |
||
125 | static void set_first_slash(token_t *t, token_t *tfsl, token_t *tlcomp) |
||
126 | { |
||
127 | *tfsl = *t; |
||
128 | } |
||
129 | static void save_component(token_t *t, token_t *tfsl, token_t *tlcomp) |
||
130 | { |
||
131 | *tlcomp = *t; |
||
132 | } |
||
133 | static void terminate_slash(token_t *t, token_t *tfsl, token_t *tlcomp) |
||
134 | { |
||
135 | if (tfsl->stop[1]) /* avoid writing to a well-formatted path */ |
||
136 | tfsl->stop[1] = '\0'; |
||
137 | } |
||
138 | static void remove_trailing_slash(token_t *t, token_t *tfsl, token_t *tlcomp) |
||
139 | { |
||
140 | t->start[-1] = '\0'; |
||
141 | } |
||
142 | /** Eat the extra '/'.. |
||
143 | * |
||
144 | * @param t The current TK_SLASH token. |
||
145 | */ |
||
146 | static void shift_slash(token_t *t, token_t *tfsl, token_t *tlcomp) |
||
147 | { |
||
148 | char *p = t->start; |
||
149 | char *q = t->stop + 1; |
||
150 | while ((*p++ = *q++)) |
||
151 | ; |
||
152 | } |
||
153 | /** Eat the extra '.'. |
||
154 | * |
||
155 | * @param t The current TK_DOT token. |
||
156 | */ |
||
157 | static void shift_dot(token_t *t, token_t *tfsl, token_t *tlcomp) |
||
158 | { |
||
159 | char *p = t->start; |
||
160 | char *q = t->stop + 1; |
||
161 | while ((*p++ = *q++)) |
||
162 | ; |
||
163 | } |
||
164 | /** Collapse the TK_COMP TK_SLASH TK_DOTDOT pattern. |
||
165 | * |
||
166 | * @param t The current TK_DOTDOT token. |
||
167 | * @param tlcomp The last TK_COMP token. |
||
168 | */ |
||
169 | static void shift_dotdot(token_t *t, token_t *tfsl, token_t *tlcomp) |
||
170 | { |
||
171 | char *p = tlcomp->start; |
||
172 | char *q = t->stop + 1; |
||
173 | while ((*p++ = *q++)) |
||
174 | ; |
||
175 | } |
||
176 | |||
177 | /** Transition function for canonify(). */ |
||
178 | static change_state_t trans[4][6] = { |
||
179 | [S_INI] = { |
||
180 | [TK_SLASH] = { |
||
181 | .s = S_A, |
||
182 | .f = set_first_slash, |
||
183 | }, |
||
184 | [TK_DOT] = { |
||
185 | .s = S_REJECT, |
||
186 | .f = NULL, |
||
187 | }, |
||
188 | [TK_DOTDOT] = { |
||
189 | .s = S_REJECT, |
||
190 | .f = NULL, |
||
191 | }, |
||
192 | [TK_COMP] = { |
||
193 | .s = S_REJECT, |
||
194 | .f = NULL, |
||
195 | }, |
||
196 | [TK_NUL] = { |
||
197 | .s = S_REJECT, |
||
198 | .f = NULL, |
||
199 | }, |
||
200 | [TK_INVALID] = { |
||
201 | .s = S_REJECT, |
||
202 | .f = NULL, |
||
203 | }, |
||
204 | }, |
||
205 | [S_A] = { |
||
206 | [TK_SLASH] = { |
||
207 | .s = S_A, |
||
208 | .f = set_first_slash, |
||
209 | }, |
||
210 | [TK_DOT] = { |
||
211 | .s = S_A, |
||
212 | .f = NULL, |
||
213 | }, |
||
214 | [TK_DOTDOT] = { |
||
215 | .s = S_A, |
||
216 | .f = NULL, |
||
217 | }, |
||
218 | [TK_COMP] = { |
||
219 | .s = S_B, |
||
220 | .f = save_component, |
||
221 | }, |
||
222 | [TK_NUL] = { |
||
223 | .s = S_ACCEPT, |
||
224 | .f = terminate_slash, |
||
225 | }, |
||
226 | [TK_INVALID] = { |
||
227 | .s = S_REJECT, |
||
228 | .f = NULL, |
||
229 | }, |
||
230 | }, |
||
231 | [S_B] = { |
||
232 | [TK_SLASH] = { |
||
233 | .s = S_C, |
||
234 | .f = NULL, |
||
235 | }, |
||
236 | [TK_DOT] = { |
||
237 | .s = S_REJECT, |
||
238 | .f = NULL, |
||
239 | }, |
||
240 | [TK_DOTDOT] = { |
||
241 | .s = S_REJECT, |
||
242 | .f = NULL, |
||
243 | }, |
||
244 | [TK_COMP] = { |
||
245 | .s = S_REJECT, |
||
246 | .f = NULL, |
||
247 | }, |
||
248 | [TK_NUL] = { |
||
249 | .s = S_ACCEPT, |
||
250 | .f = NULL, |
||
251 | }, |
||
252 | [TK_INVALID] = { |
||
253 | .s = S_REJECT, |
||
254 | .f = NULL, |
||
255 | }, |
||
256 | }, |
||
257 | [S_C] = { |
||
258 | [TK_SLASH] = { |
||
259 | .s = S_RESTART, |
||
260 | .f = shift_slash, |
||
261 | }, |
||
262 | [TK_DOT] = { |
||
263 | .s = S_RESTART, |
||
264 | .f = shift_dot, |
||
265 | }, |
||
266 | [TK_DOTDOT] = { |
||
267 | .s = S_RESTART, |
||
268 | .f = shift_dotdot, |
||
269 | }, |
||
270 | [TK_COMP] = { |
||
271 | .s = S_B, |
||
272 | .f = save_component, |
||
273 | }, |
||
274 | [TK_NUL] = { |
||
275 | .s = S_ACCEPT, |
||
276 | .f = remove_trailing_slash, |
||
277 | }, |
||
278 | [TK_INVALID] = { |
||
279 | .s = S_REJECT, |
||
280 | .f = NULL, |
||
281 | }, |
||
282 | } |
||
283 | }; |
||
284 | |||
285 | /** Canonify a file system path. |
||
286 | * |
||
287 | * A file system path is canonical, if the following holds: |
||
288 | * 1) the path is absolute (i.e. a/b/c is not canonical) |
||
289 | * 2) there is no trailing slash in the path (i.e. /a/b/c is not canonical) |
||
290 | * 3) there is no extra slash in the path (i.e. /a//b/c is not canonical) |
||
291 | * 4) there is no '.' component in the path (i.e. /a/./b/c is not canonical) |
||
292 | * 5) there is no '..' component in the path (i.e. /a/b/../c is not canonical) |
||
293 | * |
||
294 | * This function makes a potentially non-canonical file system path canonical. |
||
295 | * It works in-place and requires a NULL-terminated input string. |
||
296 | * |
||
297 | * @param path Path to be canonified. |
||
298 | * @param lenp Pointer where the length of the final path will be |
||
299 | * stored. Can be NULL. |
||
300 | * |
||
301 | * @return Canonified path or NULL on failure. |
||
302 | */ |
||
303 | char *canonify(char *path, size_t *lenp) |
||
304 | { |
||
305 | state_t state; |
||
306 | token_t t; |
||
307 | token_t tfsl; /* first slash */ |
||
308 | token_t tlcomp; /* last component */ |
||
309 | if (*path != '/') |
||
310 | return NULL; |
||
311 | tfsl = slash_token(path); |
||
312 | restart: |
||
313 | state = S_INI; |
||
314 | t = tfsl; |
||
315 | tlcomp = tfsl; |
||
316 | while (state != S_ACCEPT && state != S_RESTART && state != S_REJECT) { |
||
317 | if (trans[state][t.kind].f) |
||
318 | trans[state][t.kind].f(&t, &tfsl, &tlcomp); |
||
319 | state = trans[state][t.kind].s; |
||
320 | t = next_token(&t); |
||
321 | } |
||
322 | |||
323 | switch (state) { |
||
324 | case S_RESTART: |
||
325 | goto restart; |
||
326 | case S_REJECT: |
||
327 | return NULL; |
||
328 | case S_ACCEPT: |
||
329 | if (lenp) |
||
330 | *lenp = (size_t)((tlcomp.stop - tfsl.start) + 1); |
||
331 | return tfsl.start; |
||
332 | default: |
||
333 | abort(); |
||
334 | } |
||
335 | } |
||
336 | |||
337 | /** |
||
338 | * @} |
||
339 | */ |