Line | Branch | Exec | Source |
---|---|---|---|
1 | // | ||
2 | // Copyright (c) 2016-2019 Vinnie Falco (vinnie dot falco at gmail dot com) | ||
3 | // Copyright (c) 2022 Alan de Freitas (alandefreitas@gmail.com) | ||
4 | // | ||
5 | // Distributed under the Boost Software License, Version 1.0. (See accompanying | ||
6 | // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) | ||
7 | // | ||
8 | // Official repository: https://github.com/boostorg/url | ||
9 | // | ||
10 | |||
11 | #ifndef BOOST_URL_DETAIL_IMPL_NORMALIZE_IPP | ||
12 | #define BOOST_URL_DETAIL_IMPL_NORMALIZE_IPP | ||
13 | |||
14 | #include <boost/url/detail/config.hpp> | ||
15 | #include <boost/url/decode_view.hpp> | ||
16 | #include <boost/url/detail/decode.hpp> | ||
17 | #include <boost/url/segments_encoded_view.hpp> | ||
18 | #include <boost/url/detail/normalize.hpp> | ||
19 | #include <boost/url/grammar/ci_string.hpp> | ||
20 | #include <boost/assert.hpp> | ||
21 | #include <boost/core/ignore_unused.hpp> | ||
22 | #include <cstring> | ||
23 | |||
24 | namespace boost { | ||
25 | namespace urls { | ||
26 | namespace detail { | ||
27 | |||
28 | void | ||
29 | 5604 | pop_encoded_front( | |
30 | core::string_view& s, | ||
31 | char& c, | ||
32 | std::size_t& n) noexcept | ||
33 | { | ||
34 |
2/2✓ Branch 1 taken 5514 times.
✓ Branch 2 taken 90 times.
|
5604 | if(s.front() != '%') |
35 | { | ||
36 | 5514 | c = s.front(); | |
37 | 5514 | s.remove_prefix(1); | |
38 | } | ||
39 | else | ||
40 | { | ||
41 | 90 | detail::decode_unsafe( | |
42 | &c, | ||
43 | &c + 1, | ||
44 | s.substr(0, 3)); | ||
45 | 90 | s.remove_prefix(3); | |
46 | } | ||
47 | 5604 | ++n; | |
48 | 5604 | } | |
49 | |||
50 | int | ||
51 | 77 | compare_encoded( | |
52 | core::string_view lhs, | ||
53 | core::string_view rhs) noexcept | ||
54 | { | ||
55 | 77 | std::size_t n0 = 0; | |
56 | 77 | std::size_t n1 = 0; | |
57 | 77 | char c0 = 0; | |
58 | 77 | char c1 = 0; | |
59 | 205 | while( | |
60 |
4/4✓ Branch 1 taken 253 times.
✓ Branch 2 taken 29 times.
✓ Branch 3 taken 240 times.
✓ Branch 4 taken 42 times.
|
535 | !lhs.empty() && |
61 |
2/2✓ Branch 1 taken 240 times.
✓ Branch 2 taken 13 times.
|
253 | !rhs.empty()) |
62 | { | ||
63 | 240 | pop_encoded_front(lhs, c0, n0); | |
64 | 240 | pop_encoded_front(rhs, c1, n1); | |
65 |
2/2✓ Branch 0 taken 20 times.
✓ Branch 1 taken 220 times.
|
240 | if (c0 < c1) |
66 | 20 | return -1; | |
67 |
2/2✓ Branch 0 taken 15 times.
✓ Branch 1 taken 205 times.
|
220 | if (c1 < c0) |
68 | 15 | return 1; | |
69 | } | ||
70 | 42 | n0 += detail::decode_bytes_unsafe(lhs); | |
71 | 42 | n1 += detail::decode_bytes_unsafe(rhs); | |
72 |
2/2✓ Branch 0 taken 21 times.
✓ Branch 1 taken 21 times.
|
42 | if (n0 == n1) |
73 | 21 | return 0; | |
74 |
2/2✓ Branch 0 taken 8 times.
✓ Branch 1 taken 13 times.
|
21 | if (n0 < n1) |
75 | 8 | return -1; | |
76 | 13 | return 1; | |
77 | } | ||
78 | |||
79 | void | ||
80 | 1104 | digest_encoded( | |
81 | core::string_view s, | ||
82 | fnv_1a& hasher) noexcept | ||
83 | { | ||
84 | 1104 | char c = 0; | |
85 | 1104 | std::size_t n = 0; | |
86 |
2/2✓ Branch 1 taken 452 times.
✓ Branch 2 taken 1104 times.
|
1556 | while(!s.empty()) |
87 | { | ||
88 | 452 | pop_encoded_front(s, c, n); | |
89 | 452 | hasher.put(c); | |
90 | } | ||
91 | 1104 | } | |
92 | |||
93 | int | ||
94 | 119 | ci_compare_encoded( | |
95 | core::string_view lhs, | ||
96 | core::string_view rhs) noexcept | ||
97 | { | ||
98 | 119 | std::size_t n0 = 0; | |
99 | 119 | std::size_t n1 = 0; | |
100 | 119 | char c0 = 0; | |
101 | 119 | char c1 = 0; | |
102 | 1505 | while ( | |
103 |
4/4✓ Branch 1 taken 1521 times.
✓ Branch 2 taken 103 times.
✓ Branch 3 taken 1515 times.
✓ Branch 4 taken 109 times.
|
3145 | !lhs.empty() && |
104 |
2/2✓ Branch 1 taken 1515 times.
✓ Branch 2 taken 6 times.
|
1521 | !rhs.empty()) |
105 | { | ||
106 | 1515 | pop_encoded_front(lhs, c0, n0); | |
107 | 1515 | pop_encoded_front(rhs, c1, n1); | |
108 | 1515 | c0 = grammar::to_lower(c0); | |
109 | 1515 | c1 = grammar::to_lower(c1); | |
110 |
2/2✓ Branch 0 taken 8 times.
✓ Branch 1 taken 1507 times.
|
1515 | if (c0 < c1) |
111 | 8 | return -1; | |
112 |
2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 1505 times.
|
1507 | if (c1 < c0) |
113 | 2 | return 1; | |
114 | } | ||
115 | 109 | n0 += detail::decode_bytes_unsafe(lhs); | |
116 | 109 | n1 += detail::decode_bytes_unsafe(rhs); | |
117 |
2/2✓ Branch 0 taken 102 times.
✓ Branch 1 taken 7 times.
|
109 | if (n0 == n1) |
118 | 102 | return 0; | |
119 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 6 times.
|
7 | if (n0 < n1) |
120 | 1 | return -1; | |
121 | 6 | return 1; | |
122 | } | ||
123 | |||
124 | void | ||
125 | 276 | ci_digest_encoded( | |
126 | core::string_view s, | ||
127 | fnv_1a& hasher) noexcept | ||
128 | { | ||
129 | 276 | char c = 0; | |
130 | 276 | std::size_t n = 0; | |
131 |
2/2✓ Branch 1 taken 1642 times.
✓ Branch 2 taken 276 times.
|
1918 | while(!s.empty()) |
132 | { | ||
133 | 1642 | pop_encoded_front(s, c, n); | |
134 | 1642 | c = grammar::to_lower(c); | |
135 | 1642 | hasher.put(c); | |
136 | } | ||
137 | 276 | } | |
138 | |||
139 | int | ||
140 | 8 | compare( | |
141 | core::string_view lhs, | ||
142 | core::string_view rhs) noexcept | ||
143 | { | ||
144 | 8 | auto rlen = (std::min)(lhs.size(), rhs.size()); | |
145 |
2/2✓ Branch 0 taken 17 times.
✓ Branch 1 taken 1 times.
|
18 | for (std::size_t i = 0; i < rlen; ++i) |
146 | { | ||
147 | 17 | char c0 = lhs[i]; | |
148 | 17 | char c1 = rhs[i]; | |
149 |
2/2✓ Branch 0 taken 6 times.
✓ Branch 1 taken 11 times.
|
17 | if (c0 < c1) |
150 | 6 | return -1; | |
151 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 10 times.
|
11 | if (c1 < c0) |
152 | 1 | return 1; | |
153 | } | ||
154 |
1/2✓ Branch 2 taken 1 times.
✗ Branch 3 not taken.
|
1 | if ( lhs.size() == rhs.size() ) |
155 | 1 | return 0; | |
156 | ✗ | if ( lhs.size() < rhs.size() ) | |
157 | ✗ | return -1; | |
158 | ✗ | return 1; | |
159 | } | ||
160 | |||
161 | int | ||
162 | 156 | ci_compare( | |
163 | core::string_view lhs, | ||
164 | core::string_view rhs) noexcept | ||
165 | { | ||
166 | 156 | auto rlen = (std::min)(lhs.size(), rhs.size()); | |
167 |
2/2✓ Branch 0 taken 640 times.
✓ Branch 1 taken 149 times.
|
789 | for (std::size_t i = 0; i < rlen; ++i) |
168 | { | ||
169 | 640 | char c0 = grammar::to_lower(lhs[i]); | |
170 | 640 | char c1 = grammar::to_lower(rhs[i]); | |
171 |
2/2✓ Branch 0 taken 6 times.
✓ Branch 1 taken 634 times.
|
640 | if (c0 < c1) |
172 | 6 | return -1; | |
173 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 633 times.
|
634 | if (c1 < c0) |
174 | 1 | return 1; | |
175 | } | ||
176 |
2/2✓ Branch 2 taken 142 times.
✓ Branch 3 taken 7 times.
|
149 | if ( lhs.size() == rhs.size() ) |
177 | 142 | return 0; | |
178 |
2/2✓ Branch 2 taken 6 times.
✓ Branch 3 taken 1 times.
|
7 | if ( lhs.size() < rhs.size() ) |
179 | 6 | return -1; | |
180 | 1 | return 1; | |
181 | } | ||
182 | |||
183 | void | ||
184 | 276 | ci_digest( | |
185 | core::string_view s, | ||
186 | fnv_1a& hasher) noexcept | ||
187 | { | ||
188 |
2/2✓ Branch 2 taken 590 times.
✓ Branch 3 taken 276 times.
|
866 | for (char c: s) |
189 | { | ||
190 | 590 | c = grammar::to_lower(c); | |
191 | 590 | hasher.put(c); | |
192 | } | ||
193 | 276 | } | |
194 | |||
195 | std::size_t | ||
196 | ✗ | path_starts_with( | |
197 | core::string_view lhs, | ||
198 | core::string_view rhs) noexcept | ||
199 | { | ||
200 | ✗ | auto consume_one = []( | |
201 | core::string_view::iterator& it, | ||
202 | char &c) | ||
203 | { | ||
204 | ✗ | if(*it != '%') | |
205 | { | ||
206 | ✗ | c = *it; | |
207 | ✗ | ++it; | |
208 | ✗ | return; | |
209 | } | ||
210 | ✗ | detail::decode_unsafe( | |
211 | &c, | ||
212 | &c + 1, | ||
213 | core::string_view(it, 3)); | ||
214 | ✗ | if (c != '/') | |
215 | { | ||
216 | ✗ | it += 3; | |
217 | ✗ | return; | |
218 | } | ||
219 | ✗ | c = *it; | |
220 | ✗ | ++it; | |
221 | }; | ||
222 | |||
223 | ✗ | auto it0 = lhs.begin(); | |
224 | ✗ | auto it1 = rhs.begin(); | |
225 | ✗ | auto end0 = lhs.end(); | |
226 | ✗ | auto end1 = rhs.end(); | |
227 | ✗ | char c0 = 0; | |
228 | ✗ | char c1 = 0; | |
229 | ✗ | while ( | |
230 | ✗ | it0 < end0 && | |
231 | ✗ | it1 < end1) | |
232 | { | ||
233 | ✗ | consume_one(it0, c0); | |
234 | ✗ | consume_one(it1, c1); | |
235 | ✗ | if (c0 != c1) | |
236 | ✗ | return 0; | |
237 | } | ||
238 | ✗ | if (it1 == end1) | |
239 | ✗ | return it0 - lhs.begin(); | |
240 | ✗ | return 0; | |
241 | } | ||
242 | |||
243 | std::size_t | ||
244 | 2104 | path_ends_with( | |
245 | core::string_view lhs, | ||
246 | core::string_view rhs) noexcept | ||
247 | { | ||
248 | 5800 | auto consume_last = []( | |
249 | core::string_view::iterator& it, | ||
250 | core::string_view::iterator& end, | ||
251 | char& c) | ||
252 | { | ||
253 |
4/4✓ Branch 0 taken 3932 times.
✓ Branch 1 taken 1868 times.
✓ Branch 2 taken 5768 times.
✓ Branch 3 taken 32 times.
|
9732 | if ((end - it) < 3 || |
254 |
2/2✓ Branch 1 taken 3900 times.
✓ Branch 2 taken 32 times.
|
3932 | *(std::prev(end, 3)) != '%') |
255 | { | ||
256 | 5768 | c = *--end; | |
257 | 5768 | return; | |
258 | } | ||
259 |
1/2✓ Branch 2 taken 32 times.
✗ Branch 3 not taken.
|
32 | detail::decode_unsafe( |
260 | &c, | ||
261 | &c + 1, | ||
262 | core::string_view(std::prev( | ||
263 | end, 3), 3)); | ||
264 |
1/2✓ Branch 0 taken 32 times.
✗ Branch 1 not taken.
|
32 | if (c != '/') |
265 | { | ||
266 | 32 | end -= 3; | |
267 | 32 | return; | |
268 | } | ||
269 | ✗ | c = *--end; | |
270 | }; | ||
271 | |||
272 | 2104 | auto it0 = lhs.begin(); | |
273 | 2104 | auto it1 = rhs.begin(); | |
274 | 2104 | auto end0 = lhs.end(); | |
275 | 2104 | auto end1 = rhs.end(); | |
276 | 2104 | char c0 = 0; | |
277 | 2104 | char c1 = 0; | |
278 | 1104 | while( | |
279 |
2/2✓ Branch 0 taken 2974 times.
✓ Branch 1 taken 234 times.
|
3208 | it0 < end0 && |
280 |
2/2✓ Branch 0 taken 2900 times.
✓ Branch 1 taken 74 times.
|
2974 | it1 < end1) |
281 | { | ||
282 | 2900 | consume_last(it0, end0, c0); | |
283 | 2900 | consume_last(it1, end1, c1); | |
284 |
2/2✓ Branch 0 taken 1796 times.
✓ Branch 1 taken 1104 times.
|
2900 | if (c0 != c1) |
285 | 1796 | return 0; | |
286 | } | ||
287 |
2/2✓ Branch 0 taken 110 times.
✓ Branch 1 taken 198 times.
|
308 | if (it1 == end1) |
288 | 110 | return lhs.end() - end0; | |
289 | 198 | return 0; | |
290 | } | ||
291 | |||
292 | std::size_t | ||
293 | 833 | remove_dot_segments( | |
294 | char* dest0, | ||
295 | char const* end, | ||
296 | core::string_view s) noexcept | ||
297 | { | ||
298 | // 1. The input buffer `s` is initialized with | ||
299 | // the now-appended path components and the | ||
300 | // output buffer `dest0` is initialized to | ||
301 | // the empty string. | ||
302 | 833 | char* dest = dest0; | |
303 | |||
304 | // Step 2 is a loop through 5 production rules: | ||
305 | // https://www.rfc-editor.org/rfc/rfc3986#section-5.2.4 | ||
306 | // | ||
307 | // There are no transitions between all rules, | ||
308 | // which enables some optimizations. | ||
309 | // | ||
310 | // Initial: | ||
311 | // - Rule A: handle initial dots | ||
312 | // If the input buffer begins with a | ||
313 | // prefix of "../" or "./", then remove | ||
314 | // that prefix from the input buffer. | ||
315 | // Rule A can only happen at the beginning. | ||
316 | // Errata 4547: Keep "../" in the beginning | ||
317 | // https://www.rfc-editor.org/errata/eid4547 | ||
318 | // | ||
319 | // Then: | ||
320 | // - Rule D: ignore a final ".." or "." | ||
321 | // if the input buffer consists only of "." | ||
322 | // or "..", then remove that from the input | ||
323 | // buffer. | ||
324 | // Rule D can only happen after Rule A because: | ||
325 | // - B and C write "/" to the input | ||
326 | // - E writes "/" to input or returns | ||
327 | // | ||
328 | // Then: | ||
329 | // - Rule B: ignore ".": write "/" to the input | ||
330 | // - Rule C: apply "..": remove seg and write "/" | ||
331 | // - Rule E: copy complete segment | ||
332 | auto append = | ||
333 | 1473 | [](char*& first, char const* last, core::string_view in) | |
334 | { | ||
335 | // append `in` to `dest` | ||
336 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 1473 times.
|
1473 | BOOST_ASSERT(in.size() <= std::size_t(last - first)); |
337 | 1473 | std::memmove(first, in.data(), in.size()); | |
338 | 1473 | first += in.size(); | |
339 | ignore_unused(last); | ||
340 | 1473 | }; | |
341 | |||
342 | 9399 | auto dot_starts_with = []( | |
343 | core::string_view str, core::string_view dots, std::size_t& n) | ||
344 | { | ||
345 | // starts_with for encoded/decoded dots | ||
346 | // or decoded otherwise. return how many | ||
347 | // chars in str match the dots | ||
348 | 9399 | n = 0; | |
349 |
2/2✓ Branch 2 taken 15982 times.
✓ Branch 3 taken 495 times.
|
16477 | for (char c: dots) |
350 | { | ||
351 |
2/2✓ Branch 1 taken 1219 times.
✓ Branch 2 taken 14763 times.
|
15982 | if (str.empty()) |
352 | { | ||
353 | 1219 | n = 0; | |
354 | 1219 | return false; | |
355 | } | ||
356 |
2/2✓ Branch 1 taken 7074 times.
✓ Branch 2 taken 7689 times.
|
14763 | else if (str.starts_with(c)) |
357 | { | ||
358 | 7074 | str.remove_prefix(1); | |
359 | 7074 | ++n; | |
360 | } | ||
361 | 7689 | else if (str.size() > 2 && | |
362 |
2/2✓ Branch 1 taken 28 times.
✓ Branch 2 taken 6149 times.
|
6177 | str[0] == '%' && |
363 |
6/6✓ Branch 0 taken 6177 times.
✓ Branch 1 taken 1512 times.
✓ Branch 3 taken 20 times.
✓ Branch 4 taken 8 times.
✓ Branch 5 taken 4 times.
✓ Branch 6 taken 7685 times.
|
13886 | str[1] == '2' && |
364 |
2/2✓ Branch 1 taken 19 times.
✓ Branch 2 taken 1 times.
|
20 | (str[2] == 'e' || |
365 |
2/2✓ Branch 1 taken 3 times.
✓ Branch 2 taken 16 times.
|
19 | str[2] == 'E')) |
366 | { | ||
367 | 4 | str.remove_prefix(3); | |
368 | 4 | n += 3; | |
369 | } | ||
370 | else | ||
371 | { | ||
372 | 7685 | n = 0; | |
373 | 7685 | return false; | |
374 | } | ||
375 | } | ||
376 | 495 | return true; | |
377 | }; | ||
378 | |||
379 | 4677 | auto dot_equal = [&dot_starts_with]( | |
380 | 4677 | core::string_view str, core::string_view dots) | |
381 | { | ||
382 | 4677 | std::size_t n = 0; | |
383 | 4677 | dot_starts_with(str, dots, n); | |
384 | 4677 | return n == str.size(); | |
385 | 833 | }; | |
386 | |||
387 | // Rule A | ||
388 | std::size_t n; | ||
389 |
2/2✓ Branch 1 taken 775 times.
✓ Branch 2 taken 82 times.
|
857 | while (!s.empty()) |
390 | { | ||
391 |
2/2✓ Branch 2 taken 8 times.
✓ Branch 3 taken 767 times.
|
775 | if (dot_starts_with(s, "../", n)) |
392 | { | ||
393 | // Errata 4547 | ||
394 | 8 | append(dest, end, "../"); | |
395 | 8 | s.remove_prefix(n); | |
396 | 8 | continue; | |
397 | } | ||
398 |
2/2✓ Branch 2 taken 751 times.
✓ Branch 3 taken 16 times.
|
767 | else if (!dot_starts_with(s, "./", n)) |
399 | { | ||
400 | 751 | break; | |
401 | } | ||
402 | 16 | s.remove_prefix(n); | |
403 | } | ||
404 | |||
405 | // Rule D | ||
406 |
2/2✓ Branch 2 taken 84 times.
✓ Branch 3 taken 749 times.
|
833 | if( dot_equal(s, ".")) |
407 | { | ||
408 | 84 | s = {}; | |
409 | } | ||
410 |
2/2✓ Branch 2 taken 4 times.
✓ Branch 3 taken 745 times.
|
749 | else if( dot_equal(s, "..") ) |
411 | { | ||
412 | // Errata 4547 | ||
413 | 4 | append(dest, end, ".."); | |
414 | 4 | s = {}; | |
415 | } | ||
416 | |||
417 | // 2. While the input buffer is not empty, | ||
418 | // loop as follows: | ||
419 |
2/2✓ Branch 1 taken 1614 times.
✓ Branch 2 taken 797 times.
|
2411 | while (!s.empty()) |
420 | { | ||
421 | // Rule B | ||
422 |
2/2✓ Branch 2 taken 39 times.
✓ Branch 3 taken 1575 times.
|
1614 | if (dot_starts_with(s, "/./", n)) |
423 | { | ||
424 | 39 | s.remove_prefix(n - 1); | |
425 | 39 | continue; | |
426 | } | ||
427 |
2/2✓ Branch 2 taken 9 times.
✓ Branch 3 taken 1566 times.
|
1575 | if (dot_equal(s, "/.")) |
428 | { | ||
429 | // We can't remove "." from a core::string_view | ||
430 | // So what we do here is equivalent to | ||
431 | // replacing s with '/' as required | ||
432 | // in Rule B and executing the next | ||
433 | // iteration, which would append this | ||
434 | // '/' to the output, as required by | ||
435 | // Rule E | ||
436 | 9 | append(dest, end, s.substr(0, 1)); | |
437 | 9 | s = {}; | |
438 | 9 | break; | |
439 | } | ||
440 | |||
441 | // Rule C | ||
442 |
2/2✓ Branch 2 taken 165 times.
✓ Branch 3 taken 1401 times.
|
1566 | if (dot_starts_with(s, "/../", n)) |
443 | { | ||
444 | 330 | std::size_t p = core::string_view( | |
445 | 165 | dest0, dest - dest0).find_last_of('/'); | |
446 |
2/2✓ Branch 0 taken 119 times.
✓ Branch 1 taken 46 times.
|
165 | if (p != core::string_view::npos) |
447 | { | ||
448 | // output has multiple segments | ||
449 | // "erase" [p, end] if not "/.." | ||
450 | 119 | core::string_view last_seg(dest0 + p, dest - (dest0 + p)); | |
451 |
2/2✓ Branch 2 taken 110 times.
✓ Branch 3 taken 9 times.
|
119 | if (!dot_equal(last_seg, "/..")) |
452 | 110 | dest = dest0 + p; | |
453 | else | ||
454 | 9 | append(dest, end, "/.."); | |
455 | } | ||
456 |
2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 44 times.
|
46 | else if (dest0 != dest) |
457 | { | ||
458 | // one segment in the output | ||
459 | 2 | dest = dest0; | |
460 | 2 | s.remove_prefix(1); | |
461 | } | ||
462 | else | ||
463 | { | ||
464 | // output is empty | ||
465 | 44 | append(dest, end, "/.."); | |
466 | } | ||
467 | 165 | s.remove_prefix(n-1); | |
468 | 165 | continue; | |
469 | } | ||
470 |
2/2✓ Branch 2 taken 27 times.
✓ Branch 3 taken 1374 times.
|
1401 | if (dot_equal(s, "/..")) |
471 | { | ||
472 | 54 | std::size_t p = core::string_view( | |
473 | 27 | dest0, dest - dest0).find_last_of('/'); | |
474 |
2/2✓ Branch 0 taken 16 times.
✓ Branch 1 taken 11 times.
|
27 | if (p != core::string_view::npos) |
475 | { | ||
476 | // erase [p, end] | ||
477 | 16 | dest = dest0 + p; | |
478 | 16 | append(dest, end, "/"); | |
479 | } | ||
480 |
2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 9 times.
|
11 | else if (dest0 != dest) |
481 | { | ||
482 | 2 | dest = dest0; | |
483 | } | ||
484 | else | ||
485 | { | ||
486 | 9 | append(dest, end, "/.."); | |
487 | } | ||
488 | 27 | s = {}; | |
489 | 27 | break; | |
490 | } | ||
491 | |||
492 | // Rule E | ||
493 | 1374 | std::size_t p = s.find_first_of('/', 1); | |
494 |
2/2✓ Branch 0 taken 665 times.
✓ Branch 1 taken 709 times.
|
1374 | if (p != core::string_view::npos) |
495 | { | ||
496 | 665 | append(dest, end, s.substr(0, p)); | |
497 | 665 | s.remove_prefix(p); | |
498 | } | ||
499 | else | ||
500 | { | ||
501 | 709 | append(dest, end, s); | |
502 | 709 | s = {}; | |
503 | } | ||
504 | } | ||
505 | |||
506 | // 3. Finally, the output buffer is set | ||
507 | // as the result of remove_dot_segments, | ||
508 | // and we return its size | ||
509 | 833 | return dest - dest0; | |
510 | } | ||
511 | |||
512 | char | ||
513 | 1138 | path_pop_back( core::string_view& s ) | |
514 | { | ||
515 |
4/4✓ Branch 1 taken 518 times.
✓ Branch 2 taken 620 times.
✓ Branch 3 taken 1090 times.
✓ Branch 4 taken 48 times.
|
1656 | if (s.size() < 3 || |
516 |
3/4✓ Branch 2 taken 518 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 470 times.
✓ Branch 5 taken 48 times.
|
518 | *std::prev(s.end(), 3) != '%') |
517 | { | ||
518 | 1090 | char c = s.back(); | |
519 | 1090 | s.remove_suffix(1); | |
520 | 1090 | return c; | |
521 | } | ||
522 | 48 | char c = 0; | |
523 |
1/2✓ Branch 2 taken 48 times.
✗ Branch 3 not taken.
|
96 | detail::decode_unsafe( |
524 | 96 | &c, &c + 1, s.substr(s.size() - 3)); | |
525 |
2/2✓ Branch 0 taken 44 times.
✓ Branch 1 taken 4 times.
|
48 | if (c != '/') |
526 | { | ||
527 | 44 | s.remove_suffix(3); | |
528 | 44 | return c; | |
529 | } | ||
530 | 4 | c = s.back(); | |
531 | 4 | s.remove_suffix(1); | |
532 | 4 | return c; | |
533 | }; | ||
534 | |||
535 | void | ||
536 | 506 | pop_last_segment( | |
537 | core::string_view& s, | ||
538 | core::string_view& c, | ||
539 | std::size_t& level, | ||
540 | bool r) noexcept | ||
541 | { | ||
542 | 506 | c = {}; | |
543 | 506 | std::size_t n = 0; | |
544 |
2/2✓ Branch 1 taken 550 times.
✓ Branch 2 taken 118 times.
|
668 | while (!s.empty()) |
545 | { | ||
546 | // B. if the input buffer begins with a | ||
547 | // prefix of "/./" or "/.", where "." is | ||
548 | // a complete path segment, then replace | ||
549 | // that prefix with "/" in the input | ||
550 | // buffer; otherwise, | ||
551 | 550 | n = detail::path_ends_with(s, "/./"); | |
552 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 540 times.
|
550 | if (n) |
553 | { | ||
554 | 10 | c = s.substr(s.size() - n); | |
555 | 10 | s.remove_suffix(n); | |
556 | 10 | continue; | |
557 | } | ||
558 | 540 | n = detail::path_ends_with(s, "/."); | |
559 |
2/2✓ Branch 0 taken 12 times.
✓ Branch 1 taken 528 times.
|
540 | if (n) |
560 | { | ||
561 | 12 | c = s.substr(s.size() - n, 1); | |
562 | 12 | s.remove_suffix(n); | |
563 | 12 | continue; | |
564 | } | ||
565 | |||
566 | // C. if the input buffer begins with a | ||
567 | // prefix of "/../" or "/..", where ".." | ||
568 | // is a complete path segment, then | ||
569 | // replace that prefix with "/" in the | ||
570 | // input buffer and remove the last | ||
571 | // segment and its preceding "/" | ||
572 | // (if any) from the output buffer | ||
573 | // otherwise, | ||
574 | 528 | n = detail::path_ends_with(s, "/../"); | |
575 |
2/2✓ Branch 0 taken 42 times.
✓ Branch 1 taken 486 times.
|
528 | if (n) |
576 | { | ||
577 | 42 | c = s.substr(s.size() - n); | |
578 | 42 | s.remove_suffix(n); | |
579 | 42 | ++level; | |
580 | 42 | continue; | |
581 | } | ||
582 | 486 | n = detail::path_ends_with(s, "/.."); | |
583 |
2/2✓ Branch 0 taken 46 times.
✓ Branch 1 taken 440 times.
|
486 | if (n) |
584 | { | ||
585 | 46 | c = s.substr(s.size() - n); | |
586 | 46 | s.remove_suffix(n); | |
587 | 46 | ++level; | |
588 | 46 | continue; | |
589 | } | ||
590 | |||
591 | // E. move the first path segment in the | ||
592 | // input buffer to the end of the output | ||
593 | // buffer, including the initial "/" | ||
594 | // character (if any) and any subsequent | ||
595 | // characters up to, but not including, | ||
596 | // the next "/" character or the end of | ||
597 | // the input buffer. | ||
598 | 440 | std::size_t p = s.size() > 1 | |
599 |
2/2✓ Branch 0 taken 370 times.
✓ Branch 1 taken 70 times.
|
440 | ? s.find_last_of('/', s.size() - 2) |
600 | 440 | : core::string_view::npos; | |
601 |
2/2✓ Branch 0 taken 272 times.
✓ Branch 1 taken 168 times.
|
440 | if (p != core::string_view::npos) |
602 | { | ||
603 | 272 | c = s.substr(p + 1); | |
604 | 272 | s.remove_suffix(c.size()); | |
605 | } | ||
606 | else | ||
607 | { | ||
608 | 168 | c = s; | |
609 | 168 | s = {}; | |
610 | } | ||
611 | |||
612 |
2/2✓ Branch 0 taken 388 times.
✓ Branch 1 taken 52 times.
|
440 | if (level == 0) |
613 | 388 | return; | |
614 |
2/2✓ Branch 1 taken 42 times.
✓ Branch 2 taken 10 times.
|
52 | if (!s.empty()) |
615 | 42 | --level; | |
616 | } | ||
617 | // we still need to skip n_skip + 1 | ||
618 | // but the string is empty | ||
619 |
4/4✓ Branch 0 taken 42 times.
✓ Branch 1 taken 76 times.
✓ Branch 2 taken 34 times.
✓ Branch 3 taken 8 times.
|
118 | if (r && level) |
620 | { | ||
621 | 34 | c = "/"; | |
622 | 34 | level = 0; | |
623 | 34 | return; | |
624 | } | ||
625 |
2/2✓ Branch 0 taken 4 times.
✓ Branch 1 taken 80 times.
|
84 | else if (level) |
626 | { | ||
627 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 4 times.
|
4 | if (c.empty()) |
628 | ✗ | c = "/.."; | |
629 | else | ||
630 | 4 | c = "/../"; | |
631 | 4 | --level; | |
632 | 4 | return; | |
633 | } | ||
634 | 80 | c = {}; | |
635 | } | ||
636 | |||
637 | void | ||
638 | 276 | normalized_path_digest( | |
639 | core::string_view s, | ||
640 | bool remove_unmatched, | ||
641 | fnv_1a& hasher) noexcept | ||
642 | { | ||
643 | 276 | core::string_view child; | |
644 | 276 | std::size_t level = 0; | |
645 | 230 | do | |
646 | { | ||
647 | 506 | pop_last_segment( | |
648 | s, child, level, remove_unmatched); | ||
649 |
2/2✓ Branch 1 taken 1138 times.
✓ Branch 2 taken 506 times.
|
1644 | while (!child.empty()) |
650 | { | ||
651 | 1138 | char c = path_pop_back(child); | |
652 | 1138 | hasher.put(c); | |
653 | } | ||
654 | } | ||
655 |
2/2✓ Branch 1 taken 230 times.
✓ Branch 2 taken 276 times.
|
506 | while (!s.empty()); |
656 | 276 | } | |
657 | |||
658 | // compare segments as if there were a normalized | ||
659 | int | ||
660 | 168 | segments_compare( | |
661 | segments_encoded_view seg0, | ||
662 | segments_encoded_view seg1) noexcept | ||
663 | { | ||
664 | // calculate path size as if it were normalized | ||
665 | auto normalized_size = | ||
666 | 336 | [](segments_encoded_view seg) -> std::size_t | |
667 | { | ||
668 |
2/2✓ Branch 1 taken 102 times.
✓ Branch 2 taken 234 times.
|
336 | if (seg.empty()) |
669 | 102 | return seg.is_absolute(); | |
670 | |||
671 | 234 | std::size_t n = 0; | |
672 | 234 | std::size_t skip = 0; | |
673 | 234 | auto begin = seg.begin(); | |
674 | 234 | auto it = seg.end(); | |
675 |
2/2✓ Branch 1 taken 658 times.
✓ Branch 2 taken 234 times.
|
892 | while (it != begin) |
676 | { | ||
677 | 658 | --it; | |
678 | 658 | decode_view dseg = **it; | |
679 |
2/2✓ Branch 1 taken 167 times.
✓ Branch 2 taken 491 times.
|
658 | if (dseg == "..") |
680 | 167 | ++skip; | |
681 |
2/2✓ Branch 1 taken 453 times.
✓ Branch 2 taken 38 times.
|
491 | else if (dseg != ".") |
682 | { | ||
683 |
2/2✓ Branch 0 taken 85 times.
✓ Branch 1 taken 368 times.
|
453 | if (skip) |
684 | 85 | --skip; | |
685 | else | ||
686 | 368 | n += dseg.size() + 1; | |
687 | } | ||
688 | } | ||
689 | 234 | n += skip * 3; | |
690 | 234 | n -= !seg.is_absolute(); | |
691 | 234 | return n; | |
692 | }; | ||
693 | |||
694 | // find the normalized size for the comparison | ||
695 | 168 | std::size_t n0 = normalized_size(seg0); | |
696 | 168 | std::size_t n1 = normalized_size(seg1); | |
697 | 168 | std::size_t n00 = n0; | |
698 | 168 | std::size_t n10 = n1; | |
699 | |||
700 | // consume the last char from a segment range | ||
701 | auto consume_last = | ||
702 | 1632 | []( | |
703 | std::size_t& n, | ||
704 | decode_view& dseg, | ||
705 | segments_encoded_view::iterator& begin, | ||
706 | segments_encoded_view::iterator& it, | ||
707 | decode_view::iterator& cit, | ||
708 | std::size_t& skip, | ||
709 | bool& at_slash) -> char | ||
710 | { | ||
711 |
2/2✓ Branch 2 taken 1005 times.
✓ Branch 3 taken 627 times.
|
1632 | if (cit != dseg.begin()) |
712 | { | ||
713 | // return last char from current segment | ||
714 | 1005 | at_slash = false; | |
715 | 1005 | --cit; | |
716 | 1005 | --n; | |
717 | 1005 | return *cit; | |
718 | } | ||
719 | |||
720 |
2/2✓ Branch 0 taken 367 times.
✓ Branch 1 taken 260 times.
|
627 | if (!at_slash) |
721 | { | ||
722 | // current segment dseg is over and | ||
723 | // previous char was not a slash | ||
724 | // so we output one | ||
725 | 367 | at_slash = true; | |
726 | 367 | --n; | |
727 | 367 | return '/'; | |
728 | } | ||
729 | |||
730 | // current segment dseg is over and | ||
731 | // last char was already the slash | ||
732 | // between segments, so take the | ||
733 | // next final segment to consume | ||
734 | 260 | at_slash = false; | |
735 |
1/2✓ Branch 2 taken 498 times.
✗ Branch 3 not taken.
|
498 | while (cit == dseg.begin()) |
736 | { | ||
737 | // take next segment | ||
738 |
2/2✓ Branch 1 taken 376 times.
✓ Branch 2 taken 122 times.
|
498 | if (it != begin) |
739 | 376 | --it; | |
740 | else | ||
741 | 122 | break; | |
742 |
2/2✓ Branch 3 taken 140 times.
✓ Branch 4 taken 236 times.
|
376 | if (**it == "..") |
743 | { | ||
744 | // skip next if this is ".." | ||
745 | 140 | ++skip; | |
746 | } | ||
747 |
2/2✓ Branch 3 taken 208 times.
✓ Branch 4 taken 28 times.
|
236 | else if (**it != ".") |
748 | { | ||
749 |
2/2✓ Branch 0 taken 70 times.
✓ Branch 1 taken 138 times.
|
208 | if (skip) |
750 | { | ||
751 | // discount skips | ||
752 | 70 | --skip; | |
753 | } | ||
754 | else | ||
755 | { | ||
756 | // or update current seg | ||
757 | 138 | dseg = **it; | |
758 | 138 | cit = dseg.end(); | |
759 | 138 | break; | |
760 | } | ||
761 | } | ||
762 | } | ||
763 | // consume from the new current | ||
764 | // segment | ||
765 | 260 | --n; | |
766 |
2/2✓ Branch 2 taken 123 times.
✓ Branch 3 taken 137 times.
|
260 | if (cit != dseg.begin()) |
767 | { | ||
768 | // in the general case, we consume | ||
769 | // one more character from the end | ||
770 | 123 | --cit; | |
771 | 123 | return *cit; | |
772 | } | ||
773 | |||
774 | // nothing left to consume in the | ||
775 | // current and new segment | ||
776 |
2/2✓ Branch 1 taken 128 times.
✓ Branch 2 taken 9 times.
|
137 | if (it == begin) |
777 | { | ||
778 | // if this is the first | ||
779 | // segment, the segments are | ||
780 | // over and there can only | ||
781 | // be repetitions of "../" to | ||
782 | // output | ||
783 | 128 | return "/.."[n % 3]; | |
784 | } | ||
785 | // at other segments, we need | ||
786 | // a slash to transition to the | ||
787 | // next segment | ||
788 | 9 | at_slash = true; | |
789 | 9 | return '/'; | |
790 | }; | ||
791 | |||
792 | // consume final segments from seg0 that | ||
793 | // should not influence the comparison | ||
794 | 168 | auto begin0 = seg0.begin(); | |
795 | 168 | auto it0 = seg0.end(); | |
796 | 168 | decode_view dseg0; | |
797 |
2/2✓ Branch 2 taken 117 times.
✓ Branch 3 taken 51 times.
|
168 | if (it0 != seg0.begin()) |
798 | { | ||
799 | 117 | --it0; | |
800 | 117 | dseg0 = **it0; | |
801 | } | ||
802 | 168 | decode_view::iterator cit0 = dseg0.end(); | |
803 | 168 | std::size_t skip0 = 0; | |
804 | 168 | bool at_slash0 = true; | |
805 |
2/2✓ Branch 0 taken 134 times.
✓ Branch 1 taken 168 times.
|
302 | while (n0 > n1) |
806 | { | ||
807 | 134 | consume_last(n0, dseg0, begin0, it0, cit0, skip0, at_slash0); | |
808 | } | ||
809 | |||
810 | // consume final segments from seg1 that | ||
811 | // should not influence the comparison | ||
812 | 168 | auto begin1 = seg1.begin(); | |
813 | 168 | auto it1 = seg1.end(); | |
814 | 168 | decode_view dseg1; | |
815 |
2/2✓ Branch 2 taken 117 times.
✓ Branch 3 taken 51 times.
|
168 | if (it1 != seg1.begin()) |
816 | { | ||
817 | 117 | --it1; | |
818 | 117 | dseg1 = **it1; | |
819 | } | ||
820 | 168 | decode_view::iterator cit1 = dseg1.end(); | |
821 | 168 | std::size_t skip1 = 0; | |
822 | 168 | bool at_slash1 = true; | |
823 |
2/2✓ Branch 0 taken 34 times.
✓ Branch 1 taken 168 times.
|
202 | while (n1 > n0) |
824 | { | ||
825 | 34 | consume_last(n1, dseg1, begin1, it1, cit1, skip1, at_slash1); | |
826 | } | ||
827 | |||
828 | 168 | int cmp = 0; | |
829 |
2/2✓ Branch 0 taken 732 times.
✓ Branch 1 taken 168 times.
|
900 | while (n0) |
830 | { | ||
831 | 732 | char c0 = consume_last( | |
832 | n0, dseg0, begin0, it0, cit0, skip0, at_slash0); | ||
833 | 732 | char c1 = consume_last( | |
834 | n1, dseg1, begin1, it1, cit1, skip1, at_slash1); | ||
835 |
2/2✓ Branch 0 taken 36 times.
✓ Branch 1 taken 696 times.
|
732 | if (c0 < c1) |
836 | 36 | cmp = -1; | |
837 |
2/2✓ Branch 0 taken 41 times.
✓ Branch 1 taken 655 times.
|
696 | else if (c1 < c0) |
838 | 41 | cmp = +1; | |
839 | } | ||
840 | |||
841 |
2/2✓ Branch 0 taken 41 times.
✓ Branch 1 taken 127 times.
|
168 | if (cmp != 0) |
842 | 41 | return cmp; | |
843 |
2/2✓ Branch 0 taken 125 times.
✓ Branch 1 taken 2 times.
|
127 | if ( n00 == n10 ) |
844 | 125 | return 0; | |
845 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 1 times.
|
2 | if ( n00 < n10 ) |
846 | 1 | return -1; | |
847 | 1 | return 1; | |
848 | } | ||
849 | |||
850 | } // detail | ||
851 | } // urls | ||
852 | } // boost | ||
853 | |||
854 | #endif | ||
855 |