1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "base/files/file_path.h"
6
7 #include <string.h>
8
9 #include <algorithm>
10 #include <iterator>
11 #include <string>
12 #include <string_view>
13
14 #include "base/logging.h"
15 #include "base/strings/string_util.h"
16 #include "base/strings/utf_string_conversions.h"
17 #include "util/build_config.h"
18
19 #if defined(OS_MACOSX)
20 #include "base/mac/scoped_cftyperef.h"
21 #include "base/third_party/icu/icu_utf.h"
22 #endif
23
24 #if defined(OS_WIN)
25 #include <windows.h>
26 #elif defined(OS_MACOSX)
27 #include <CoreFoundation/CoreFoundation.h>
28 #endif
29
30 namespace base {
31
32 using StringType = FilePath::StringType;
33 using StringViewType = FilePath::StringViewType;
34
35 namespace {
36
37 const char* const kCommonDoubleExtensionSuffixes[] = {"gz", "z", "bz2", "bz"};
38 const char* const kCommonDoubleExtensions[] = {"user.js"};
39
40 const FilePath::CharType kStringTerminator = FILE_PATH_LITERAL('\0');
41
42 // If this FilePath contains a drive letter specification, returns the
43 // position of the last character of the drive letter specification,
44 // otherwise returns npos. This can only be true on Windows, when a pathname
45 // begins with a letter followed by a colon. On other platforms, this always
46 // returns npos.
FindDriveLetter(StringViewType path)47 StringViewType::size_type FindDriveLetter(StringViewType path) {
48 #if defined(FILE_PATH_USES_DRIVE_LETTERS)
49 // This is dependent on an ASCII-based character set, but that's a
50 // reasonable assumption. iswalpha can be too inclusive here.
51 if (path.length() >= 2 && path[1] == L':' &&
52 ((path[0] >= L'A' && path[0] <= L'Z') ||
53 (path[0] >= L'a' && path[0] <= L'z'))) {
54 return 1;
55 }
56 #endif // FILE_PATH_USES_DRIVE_LETTERS
57 return StringType::npos;
58 }
59
60 #if defined(FILE_PATH_USES_DRIVE_LETTERS)
EqualDriveLetterCaseInsensitive(StringViewType a, StringViewType b)61 bool EqualDriveLetterCaseInsensitive(StringViewType a, StringViewType b) {
62 size_t a_letter_pos = FindDriveLetter(a);
63 size_t b_letter_pos = FindDriveLetter(b);
64
65 if (a_letter_pos == StringType::npos || b_letter_pos == StringType::npos)
66 return a == b;
67
68 StringViewType a_letter(a.substr(0, a_letter_pos + 1));
69 StringViewType b_letter(b.substr(0, b_letter_pos + 1));
70 if (!StartsWithCaseInsensitiveASCII(a_letter, b_letter))
71 return false;
72
73 StringViewType a_rest(a.substr(a_letter_pos + 1));
74 StringViewType b_rest(b.substr(b_letter_pos + 1));
75 return a_rest == b_rest;
76 }
77 #endif // defined(FILE_PATH_USES_DRIVE_LETTERS)
78
IsPathAbsolute(StringViewType path)79 bool IsPathAbsolute(StringViewType path) {
80 #if defined(FILE_PATH_USES_DRIVE_LETTERS)
81 StringType::size_type letter = FindDriveLetter(path);
82 if (letter != StringType::npos) {
83 // Look for a separator right after the drive specification.
84 return path.length() > letter + 1 &&
85 FilePath::IsSeparator(path[letter + 1]);
86 }
87 // Look for a pair of leading separators.
88 return path.length() > 1 && FilePath::IsSeparator(path[0]) &&
89 FilePath::IsSeparator(path[1]);
90 #else // FILE_PATH_USES_DRIVE_LETTERS
91 // Look for a separator in the first position.
92 return path.length() > 0 && FilePath::IsSeparator(path[0]);
93 #endif // FILE_PATH_USES_DRIVE_LETTERS
94 }
95
AreAllSeparators(const StringType& input)96 bool AreAllSeparators(const StringType& input) {
97 for (StringType::const_iterator it = input.begin(); it != input.end(); ++it) {
98 if (!FilePath::IsSeparator(*it))
99 return false;
100 }
101
102 return true;
103 }
104
105 // Find the position of the '.' that separates the extension from the rest
106 // of the file name. The position is relative to BaseName(), not value().
107 // Returns npos if it can't find an extension.
FinalExtensionSeparatorPosition(const StringType& path)108 StringType::size_type FinalExtensionSeparatorPosition(const StringType& path) {
109 // Special case "." and ".."
110 if (path == FilePath::kCurrentDirectory || path == FilePath::kParentDirectory)
111 return StringType::npos;
112
113 return path.rfind(FilePath::kExtensionSeparator);
114 }
115
116 // Same as above, but allow a second extension component of up to 4
117 // characters when the rightmost extension component is a common double
118 // extension (gz, bz2, Z). For example, foo.tar.gz or foo.tar.Z would have
119 // extension components of '.tar.gz' and '.tar.Z' respectively.
ExtensionSeparatorPosition(const StringType& path)120 StringType::size_type ExtensionSeparatorPosition(const StringType& path) {
121 const StringType::size_type last_dot = FinalExtensionSeparatorPosition(path);
122
123 // No extension, or the extension is the whole filename.
124 if (last_dot == StringType::npos || last_dot == 0U)
125 return last_dot;
126
127 const StringType::size_type penultimate_dot =
128 path.rfind(FilePath::kExtensionSeparator, last_dot - 1);
129 const StringType::size_type last_separator = path.find_last_of(
130 FilePath::kSeparators, last_dot - 1, FilePath::kSeparatorsLength - 1);
131
132 if (penultimate_dot == StringType::npos ||
133 (last_separator != StringType::npos &&
134 penultimate_dot < last_separator)) {
135 return last_dot;
136 }
137
138 for (size_t i = 0; i < std::size(kCommonDoubleExtensions); ++i) {
139 StringType extension(path, penultimate_dot + 1);
140 if (LowerCaseEqualsASCII(extension, kCommonDoubleExtensions[i]))
141 return penultimate_dot;
142 }
143
144 StringType extension(path, last_dot + 1);
145 for (size_t i = 0; i < std::size(kCommonDoubleExtensionSuffixes); ++i) {
146 if (LowerCaseEqualsASCII(extension, kCommonDoubleExtensionSuffixes[i])) {
147 if ((last_dot - penultimate_dot) <= 5U &&
148 (last_dot - penultimate_dot) > 1U) {
149 return penultimate_dot;
150 }
151 }
152 }
153
154 return last_dot;
155 }
156
157 // Returns true if path is "", ".", or "..".
IsEmptyOrSpecialCase(const StringType& path)158 bool IsEmptyOrSpecialCase(const StringType& path) {
159 // Special cases "", ".", and ".."
160 if (path.empty() || path == FilePath::kCurrentDirectory ||
161 path == FilePath::kParentDirectory) {
162 return true;
163 }
164
165 return false;
166 }
167
168 } // namespace
169
170 FilePath::FilePath() = default;
171
172 FilePath::FilePath(const FilePath& that) = default;
173 FilePath::FilePath(FilePath&& that) noexcept = default;
174
FilePath(StringViewType path)175 FilePath::FilePath(StringViewType path) {
176 path_.assign(path);
177 StringType::size_type nul_pos = path_.find(kStringTerminator);
178 if (nul_pos != StringType::npos)
179 path_.erase(nul_pos, StringType::npos);
180 }
181
182 FilePath::~FilePath() = default;
183
184 FilePath& FilePath::operator=(const FilePath& that) = default;
185
186 FilePath& FilePath::operator=(FilePath&& that) = default;
187
operator ==(const FilePath& that) const188 bool FilePath::operator==(const FilePath& that) const {
189 #if defined(FILE_PATH_USES_DRIVE_LETTERS)
190 return EqualDriveLetterCaseInsensitive(this->path_, that.path_);
191 #else // defined(FILE_PATH_USES_DRIVE_LETTERS)
192 return path_ == that.path_;
193 #endif // defined(FILE_PATH_USES_DRIVE_LETTERS)
194 }
195
operator !=(const FilePath& that) const196 bool FilePath::operator!=(const FilePath& that) const {
197 #if defined(FILE_PATH_USES_DRIVE_LETTERS)
198 return !EqualDriveLetterCaseInsensitive(this->path_, that.path_);
199 #else // defined(FILE_PATH_USES_DRIVE_LETTERS)
200 return path_ != that.path_;
201 #endif // defined(FILE_PATH_USES_DRIVE_LETTERS)
202 }
203
204 // static
IsSeparator(CharType character)205 bool FilePath::IsSeparator(CharType character) {
206 for (size_t i = 0; i < kSeparatorsLength - 1; ++i) {
207 if (character == kSeparators[i]) {
208 return true;
209 }
210 }
211
212 return false;
213 }
214
GetComponents(std::vector<StringType>* components) const215 void FilePath::GetComponents(std::vector<StringType>* components) const {
216 DCHECK(components);
217 if (!components)
218 return;
219 components->clear();
220 if (value().empty())
221 return;
222
223 std::vector<StringType> ret_val;
224 FilePath current = *this;
225 FilePath base;
226
227 // Capture path components.
228 while (current != current.DirName()) {
229 base = current.BaseName();
230 if (!AreAllSeparators(base.value()))
231 ret_val.push_back(base.value());
232 current = current.DirName();
233 }
234
235 // Capture root, if any.
236 base = current.BaseName();
237 if (!base.value().empty() && base.value() != kCurrentDirectory)
238 ret_val.push_back(current.BaseName().value());
239
240 // Capture drive letter, if any.
241 FilePath dir = current.DirName();
242 StringType::size_type letter = FindDriveLetter(dir.value());
243 if (letter != StringType::npos) {
244 ret_val.push_back(StringType(dir.value(), 0, letter + 1));
245 }
246
247 *components = std::vector<StringType>(ret_val.rbegin(), ret_val.rend());
248 }
249
IsParent(const FilePath& child) const250 bool FilePath::IsParent(const FilePath& child) const {
251 return AppendRelativePath(child, nullptr);
252 }
253
AppendRelativePath(const FilePath& child, FilePath* path) const254 bool FilePath::AppendRelativePath(const FilePath& child, FilePath* path) const {
255 std::vector<StringType> parent_components;
256 std::vector<StringType> child_components;
257 GetComponents(&parent_components);
258 child.GetComponents(&child_components);
259
260 if (parent_components.empty() ||
261 parent_components.size() >= child_components.size())
262 return false;
263
264 std::vector<StringType>::const_iterator parent_comp =
265 parent_components.begin();
266 std::vector<StringType>::const_iterator child_comp = child_components.begin();
267
268 #if defined(FILE_PATH_USES_DRIVE_LETTERS)
269 // Windows can access case sensitive filesystems, so component
270 // comparisons must be case sensitive, but drive letters are
271 // never case sensitive.
272 if ((FindDriveLetter(*parent_comp) != StringType::npos) &&
273 (FindDriveLetter(*child_comp) != StringType::npos)) {
274 if (!StartsWithCaseInsensitiveASCII(*parent_comp, *child_comp))
275 return false;
276 ++parent_comp;
277 ++child_comp;
278 }
279 #endif // defined(FILE_PATH_USES_DRIVE_LETTERS)
280
281 while (parent_comp != parent_components.end()) {
282 if (*parent_comp != *child_comp)
283 return false;
284 ++parent_comp;
285 ++child_comp;
286 }
287
288 if (path != nullptr) {
289 for (; child_comp != child_components.end(); ++child_comp) {
290 *path = path->Append(*child_comp);
291 }
292 }
293 return true;
294 }
295
296 // libgen's dirname and basename aren't guaranteed to be thread-safe and aren't
297 // guaranteed to not modify their input strings, and in fact are implemented
298 // differently in this regard on different platforms. Don't use them, but
299 // adhere to their behavior.
DirName() const300 FilePath FilePath::DirName() const {
301 FilePath new_path(path_);
302 new_path.StripTrailingSeparatorsInternal();
303
304 // The drive letter, if any, always needs to remain in the output. If there
305 // is no drive letter, as will always be the case on platforms which do not
306 // support drive letters, letter will be npos, or -1, so the comparisons and
307 // resizes below using letter will still be valid.
308 StringType::size_type letter = FindDriveLetter(new_path.path_);
309
310 StringType::size_type last_separator = new_path.path_.find_last_of(
311 kSeparators, StringType::npos, kSeparatorsLength - 1);
312 if (last_separator == StringType::npos) {
313 // path_ is in the current directory.
314 new_path.path_.resize(letter + 1);
315 } else if (last_separator == letter + 1) {
316 // path_ is in the root directory.
317 new_path.path_.resize(letter + 2);
318 } else if (last_separator == letter + 2 &&
319 IsSeparator(new_path.path_[letter + 1])) {
320 // path_ is in "//" (possibly with a drive letter); leave the double
321 // separator intact indicating alternate root.
322 new_path.path_.resize(letter + 3);
323 } else if (last_separator != 0) {
324 // path_ is somewhere else, trim the basename.
325 new_path.path_.resize(last_separator);
326 }
327
328 new_path.StripTrailingSeparatorsInternal();
329 if (!new_path.path_.length())
330 new_path.path_ = kCurrentDirectory;
331
332 return new_path;
333 }
334
BaseName() const335 FilePath FilePath::BaseName() const {
336 FilePath new_path(path_);
337 new_path.StripTrailingSeparatorsInternal();
338
339 // The drive letter, if any, is always stripped.
340 StringType::size_type letter = FindDriveLetter(new_path.path_);
341 if (letter != StringType::npos) {
342 new_path.path_.erase(0, letter + 1);
343 }
344
345 // Keep everything after the final separator, but if the pathname is only
346 // one character and it's a separator, leave it alone.
347 StringType::size_type last_separator = new_path.path_.find_last_of(
348 kSeparators, StringType::npos, kSeparatorsLength - 1);
349 if (last_separator != StringType::npos &&
350 last_separator < new_path.path_.length() - 1) {
351 new_path.path_.erase(0, last_separator + 1);
352 }
353
354 return new_path;
355 }
356
Extension() const357 StringType FilePath::Extension() const {
358 FilePath base(BaseName());
359 const StringType::size_type dot = ExtensionSeparatorPosition(base.path_);
360 if (dot == StringType::npos)
361 return StringType();
362
363 return base.path_.substr(dot, StringType::npos);
364 }
365
FinalExtension() const366 StringType FilePath::FinalExtension() const {
367 FilePath base(BaseName());
368 const StringType::size_type dot = FinalExtensionSeparatorPosition(base.path_);
369 if (dot == StringType::npos)
370 return StringType();
371
372 return base.path_.substr(dot, StringType::npos);
373 }
374
RemoveExtension() const375 FilePath FilePath::RemoveExtension() const {
376 if (Extension().empty())
377 return *this;
378
379 const StringType::size_type dot = ExtensionSeparatorPosition(path_);
380 if (dot == StringType::npos)
381 return *this;
382
383 return FilePath(path_.substr(0, dot));
384 }
385
RemoveFinalExtension() const386 FilePath FilePath::RemoveFinalExtension() const {
387 if (FinalExtension().empty())
388 return *this;
389
390 const StringType::size_type dot = FinalExtensionSeparatorPosition(path_);
391 if (dot == StringType::npos)
392 return *this;
393
394 return FilePath(path_.substr(0, dot));
395 }
396
InsertBeforeExtension(StringViewType suffix) const397 FilePath FilePath::InsertBeforeExtension(StringViewType suffix) const {
398 if (suffix.empty())
399 return FilePath(path_);
400
401 if (IsEmptyOrSpecialCase(BaseName().value()))
402 return FilePath();
403
404 StringType ext = Extension();
405 StringType ret = RemoveExtension().value();
406 ret.append(suffix);
407 ret.append(ext);
408 return FilePath(ret);
409 }
410
InsertBeforeExtensionASCII(std::string_view suffix) const411 FilePath FilePath::InsertBeforeExtensionASCII(std::string_view suffix) const {
412 DCHECK(IsStringASCII(suffix));
413 #if defined(OS_WIN)
414 return InsertBeforeExtension(ASCIIToUTF16(suffix));
415 #elif defined(OS_POSIX) || defined(OS_FUCHSIA)
416 return InsertBeforeExtension(suffix);
417 #endif
418 }
419
AddExtension(StringViewType extension) const420 FilePath FilePath::AddExtension(StringViewType extension) const {
421 if (IsEmptyOrSpecialCase(BaseName().value()))
422 return FilePath();
423
424 // If the new extension is "" or ".", then just return the current FilePath.
425 if (extension.empty() ||
426 (extension.size() == 1 && extension[0] == kExtensionSeparator))
427 return *this;
428
429 StringType str = path_;
430 if (extension[0] != kExtensionSeparator &&
431 *(str.end() - 1) != kExtensionSeparator) {
432 str.append(1, kExtensionSeparator);
433 }
434 str.append(extension);
435 return FilePath(str);
436 }
437
ReplaceExtension(StringViewType extension) const438 FilePath FilePath::ReplaceExtension(StringViewType extension) const {
439 if (IsEmptyOrSpecialCase(BaseName().value()))
440 return FilePath();
441
442 FilePath no_ext = RemoveExtension();
443 // If the new extension is "" or ".", then just remove the current extension.
444 if (extension.empty() ||
445 (extension.size() == 1 && extension[0] == kExtensionSeparator))
446 return no_ext;
447
448 StringType str = no_ext.value();
449 if (extension[0] != kExtensionSeparator)
450 str.append(1, kExtensionSeparator);
451 str.append(extension);
452 return FilePath(str);
453 }
454
Append(StringViewType component) const455 FilePath FilePath::Append(StringViewType component) const {
456 StringViewType appended = component;
457 StringType without_nuls;
458
459 StringType::size_type nul_pos = component.find(kStringTerminator);
460 if (nul_pos != StringViewType::npos) {
461 without_nuls.assign(component.substr(0, nul_pos));
462 appended = StringViewType(without_nuls);
463 }
464
465 DCHECK(!IsPathAbsolute(appended));
466
467 if (path_.compare(kCurrentDirectory) == 0 && !appended.empty()) {
468 // Append normally doesn't do any normalization, but as a special case,
469 // when appending to kCurrentDirectory, just return a new path for the
470 // component argument. Appending component to kCurrentDirectory would
471 // serve no purpose other than needlessly lengthening the path, and
472 // it's likely in practice to wind up with FilePath objects containing
473 // only kCurrentDirectory when calling DirName on a single relative path
474 // component.
475 return FilePath(appended);
476 }
477
478 FilePath new_path(path_);
479 new_path.StripTrailingSeparatorsInternal();
480
481 // Don't append a separator if the path is empty (indicating the current
482 // directory) or if the path component is empty (indicating nothing to
483 // append).
484 if (!appended.empty() && !new_path.path_.empty()) {
485 // Don't append a separator if the path still ends with a trailing
486 // separator after stripping (indicating the root directory).
487 if (!IsSeparator(new_path.path_.back())) {
488 // Don't append a separator if the path is just a drive letter.
489 if (FindDriveLetter(new_path.path_) + 1 != new_path.path_.length()) {
490 new_path.path_.append(1, kSeparators[0]);
491 }
492 }
493 }
494
495 new_path.path_.append(appended);
496 return new_path;
497 }
498
Append(const FilePath& component) const499 FilePath FilePath::Append(const FilePath& component) const {
500 return Append(component.value());
501 }
502
AppendASCII(std::string_view component) const503 FilePath FilePath::AppendASCII(std::string_view component) const {
504 DCHECK(base::IsStringASCII(component));
505 #if defined(OS_WIN)
506 return Append(ASCIIToUTF16(component));
507 #elif defined(OS_POSIX) || defined(OS_FUCHSIA)
508 return Append(component);
509 #endif
510 }
511
IsAbsolute() const512 bool FilePath::IsAbsolute() const {
513 return IsPathAbsolute(path_);
514 }
515
EndsWithSeparator() const516 bool FilePath::EndsWithSeparator() const {
517 if (empty())
518 return false;
519 return IsSeparator(path_.back());
520 }
521
AsEndingWithSeparator() const522 FilePath FilePath::AsEndingWithSeparator() const {
523 if (EndsWithSeparator() || path_.empty())
524 return *this;
525
526 StringType path_str;
527 path_str.reserve(path_.length() + 1); // Only allocate string once.
528
529 path_str = path_;
530 path_str.append(&kSeparators[0], 1);
531 return FilePath(path_str);
532 }
533
StripTrailingSeparators() const534 FilePath FilePath::StripTrailingSeparators() const {
535 FilePath new_path(path_);
536 new_path.StripTrailingSeparatorsInternal();
537
538 return new_path;
539 }
540
ReferencesParent() const541 bool FilePath::ReferencesParent() const {
542 if (path_.find(kParentDirectory) == StringType::npos) {
543 // GetComponents is quite expensive, so avoid calling it in the majority
544 // of cases where there isn't a kParentDirectory anywhere in the path.
545 return false;
546 }
547
548 std::vector<StringType> components;
549 GetComponents(&components);
550
551 std::vector<StringType>::const_iterator it = components.begin();
552 for (; it != components.end(); ++it) {
553 const StringType& component = *it;
554 // Windows has odd, undocumented behavior with path components containing
555 // only whitespace and . characters. So, if all we see is . and
556 // whitespace, then we treat any .. sequence as referencing parent.
557 // For simplicity we enforce this on all platforms.
558 if (component.find_first_not_of(FILE_PATH_LITERAL(". \n\r\t")) ==
559 std::string::npos &&
560 component.find(kParentDirectory) != std::string::npos) {
561 return true;
562 }
563 }
564 return false;
565 }
566
567 #if defined(OS_WIN)
568
LossyDisplayName() const569 std::u16string FilePath::LossyDisplayName() const {
570 return path_;
571 }
572
MaybeAsASCII() const573 std::string FilePath::MaybeAsASCII() const {
574 if (base::IsStringASCII(path_))
575 return UTF16ToASCII(path_);
576 return std::string();
577 }
578
As8Bit() const579 std::string FilePath::As8Bit() const {
580 return UTF16ToUTF8(value());
581 }
582
583 #elif defined(OS_POSIX) || defined(OS_FUCHSIA)
584
585 // See file_path.h for a discussion of the encoding of paths on POSIX
586 // platforms. These encoding conversion functions are not quite correct.
587
MaybeAsASCII() const588 std::string FilePath::MaybeAsASCII() const {
589 if (base::IsStringASCII(path_))
590 return path_;
591 return std::string();
592 }
593
As8Bit() const594 std::string FilePath::As8Bit() const {
595 return value();
596 }
597
598 #endif // defined(OS_WIN)
599
StripTrailingSeparatorsInternal()600 void FilePath::StripTrailingSeparatorsInternal() {
601 // If there is no drive letter, start will be 1, which will prevent stripping
602 // the leading separator if there is only one separator. If there is a drive
603 // letter, start will be set appropriately to prevent stripping the first
604 // separator following the drive letter, if a separator immediately follows
605 // the drive letter.
606 StringType::size_type start = FindDriveLetter(path_) + 2;
607
608 StringType::size_type last_stripped = StringType::npos;
609 for (StringType::size_type pos = path_.length();
610 pos > start && IsSeparator(path_[pos - 1]); --pos) {
611 // If the string only has two separators and they're at the beginning,
612 // don't strip them, unless the string began with more than two separators.
613 if (pos != start + 1 || last_stripped == start + 2 ||
614 !IsSeparator(path_[start - 1])) {
615 path_.resize(pos - 1);
616 last_stripped = pos;
617 }
618 }
619 }
620
NormalizePathSeparators() const621 FilePath FilePath::NormalizePathSeparators() const {
622 return NormalizePathSeparatorsTo(kSeparators[0]);
623 }
624
NormalizePathSeparatorsTo(CharType separator) const625 FilePath FilePath::NormalizePathSeparatorsTo(CharType separator) const {
626 #if defined(FILE_PATH_USES_WIN_SEPARATORS)
627 DCHECK_NE(static_cast<const void*>(kSeparators + kSeparatorsLength),
628 static_cast<const void*>(std::find(
629 kSeparators, kSeparators + kSeparatorsLength, separator)));
630 StringType copy = path_;
631 for (size_t i = 0; i < kSeparatorsLength; ++i) {
632 std::replace(copy.begin(), copy.end(), kSeparators[i], separator);
633 }
634 return FilePath(copy);
635 #else
636 return *this;
637 #endif
638 }
639
640 } // namespace base
641