xref: /third_party/gn/src/base/files/file_path.cc (revision 6d528ed9)
1// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "base/files/file_path.h"
6
7#include <string.h>
8
9#include <algorithm>
10#include <iterator>
11#include <string>
12#include <string_view>
13
14#include "base/logging.h"
15#include "base/strings/string_util.h"
16#include "base/strings/utf_string_conversions.h"
17#include "util/build_config.h"
18
19#if defined(OS_MACOSX)
20#include "base/mac/scoped_cftyperef.h"
21#include "base/third_party/icu/icu_utf.h"
22#endif
23
24#if defined(OS_WIN)
25#include <windows.h>
26#elif defined(OS_MACOSX)
27#include <CoreFoundation/CoreFoundation.h>
28#endif
29
30namespace base {
31
32using StringType = FilePath::StringType;
33using StringViewType = FilePath::StringViewType;
34
35namespace {
36
37const char* const kCommonDoubleExtensionSuffixes[] = {"gz", "z", "bz2", "bz"};
38const char* const kCommonDoubleExtensions[] = {"user.js"};
39
40const FilePath::CharType kStringTerminator = FILE_PATH_LITERAL('\0');
41
42// If this FilePath contains a drive letter specification, returns the
43// position of the last character of the drive letter specification,
44// otherwise returns npos.  This can only be true on Windows, when a pathname
45// begins with a letter followed by a colon.  On other platforms, this always
46// returns npos.
47StringViewType::size_type FindDriveLetter(StringViewType path) {
48#if defined(FILE_PATH_USES_DRIVE_LETTERS)
49  // This is dependent on an ASCII-based character set, but that's a
50  // reasonable assumption.  iswalpha can be too inclusive here.
51  if (path.length() >= 2 && path[1] == L':' &&
52      ((path[0] >= L'A' && path[0] <= L'Z') ||
53       (path[0] >= L'a' && path[0] <= L'z'))) {
54    return 1;
55  }
56#endif  // FILE_PATH_USES_DRIVE_LETTERS
57  return StringType::npos;
58}
59
60#if defined(FILE_PATH_USES_DRIVE_LETTERS)
61bool EqualDriveLetterCaseInsensitive(StringViewType a, StringViewType b) {
62  size_t a_letter_pos = FindDriveLetter(a);
63  size_t b_letter_pos = FindDriveLetter(b);
64
65  if (a_letter_pos == StringType::npos || b_letter_pos == StringType::npos)
66    return a == b;
67
68  StringViewType a_letter(a.substr(0, a_letter_pos + 1));
69  StringViewType b_letter(b.substr(0, b_letter_pos + 1));
70  if (!StartsWithCaseInsensitiveASCII(a_letter, b_letter))
71    return false;
72
73  StringViewType a_rest(a.substr(a_letter_pos + 1));
74  StringViewType b_rest(b.substr(b_letter_pos + 1));
75  return a_rest == b_rest;
76}
77#endif  // defined(FILE_PATH_USES_DRIVE_LETTERS)
78
79bool IsPathAbsolute(StringViewType path) {
80#if defined(FILE_PATH_USES_DRIVE_LETTERS)
81  StringType::size_type letter = FindDriveLetter(path);
82  if (letter != StringType::npos) {
83    // Look for a separator right after the drive specification.
84    return path.length() > letter + 1 &&
85           FilePath::IsSeparator(path[letter + 1]);
86  }
87  // Look for a pair of leading separators.
88  return path.length() > 1 && FilePath::IsSeparator(path[0]) &&
89         FilePath::IsSeparator(path[1]);
90#else   // FILE_PATH_USES_DRIVE_LETTERS
91  // Look for a separator in the first position.
92  return path.length() > 0 && FilePath::IsSeparator(path[0]);
93#endif  // FILE_PATH_USES_DRIVE_LETTERS
94}
95
96bool AreAllSeparators(const StringType& input) {
97  for (StringType::const_iterator it = input.begin(); it != input.end(); ++it) {
98    if (!FilePath::IsSeparator(*it))
99      return false;
100  }
101
102  return true;
103}
104
105// Find the position of the '.' that separates the extension from the rest
106// of the file name. The position is relative to BaseName(), not value().
107// Returns npos if it can't find an extension.
108StringType::size_type FinalExtensionSeparatorPosition(const StringType& path) {
109  // Special case "." and ".."
110  if (path == FilePath::kCurrentDirectory || path == FilePath::kParentDirectory)
111    return StringType::npos;
112
113  return path.rfind(FilePath::kExtensionSeparator);
114}
115
116// Same as above, but allow a second extension component of up to 4
117// characters when the rightmost extension component is a common double
118// extension (gz, bz2, Z).  For example, foo.tar.gz or foo.tar.Z would have
119// extension components of '.tar.gz' and '.tar.Z' respectively.
120StringType::size_type ExtensionSeparatorPosition(const StringType& path) {
121  const StringType::size_type last_dot = FinalExtensionSeparatorPosition(path);
122
123  // No extension, or the extension is the whole filename.
124  if (last_dot == StringType::npos || last_dot == 0U)
125    return last_dot;
126
127  const StringType::size_type penultimate_dot =
128      path.rfind(FilePath::kExtensionSeparator, last_dot - 1);
129  const StringType::size_type last_separator = path.find_last_of(
130      FilePath::kSeparators, last_dot - 1, FilePath::kSeparatorsLength - 1);
131
132  if (penultimate_dot == StringType::npos ||
133      (last_separator != StringType::npos &&
134       penultimate_dot < last_separator)) {
135    return last_dot;
136  }
137
138  for (size_t i = 0; i < std::size(kCommonDoubleExtensions); ++i) {
139    StringType extension(path, penultimate_dot + 1);
140    if (LowerCaseEqualsASCII(extension, kCommonDoubleExtensions[i]))
141      return penultimate_dot;
142  }
143
144  StringType extension(path, last_dot + 1);
145  for (size_t i = 0; i < std::size(kCommonDoubleExtensionSuffixes); ++i) {
146    if (LowerCaseEqualsASCII(extension, kCommonDoubleExtensionSuffixes[i])) {
147      if ((last_dot - penultimate_dot) <= 5U &&
148          (last_dot - penultimate_dot) > 1U) {
149        return penultimate_dot;
150      }
151    }
152  }
153
154  return last_dot;
155}
156
157// Returns true if path is "", ".", or "..".
158bool IsEmptyOrSpecialCase(const StringType& path) {
159  // Special cases "", ".", and ".."
160  if (path.empty() || path == FilePath::kCurrentDirectory ||
161      path == FilePath::kParentDirectory) {
162    return true;
163  }
164
165  return false;
166}
167
168}  // namespace
169
170FilePath::FilePath() = default;
171
172FilePath::FilePath(const FilePath& that) = default;
173FilePath::FilePath(FilePath&& that) noexcept = default;
174
175FilePath::FilePath(StringViewType path) {
176  path_.assign(path);
177  StringType::size_type nul_pos = path_.find(kStringTerminator);
178  if (nul_pos != StringType::npos)
179    path_.erase(nul_pos, StringType::npos);
180}
181
182FilePath::~FilePath() = default;
183
184FilePath& FilePath::operator=(const FilePath& that) = default;
185
186FilePath& FilePath::operator=(FilePath&& that) = default;
187
188bool FilePath::operator==(const FilePath& that) const {
189#if defined(FILE_PATH_USES_DRIVE_LETTERS)
190  return EqualDriveLetterCaseInsensitive(this->path_, that.path_);
191#else   // defined(FILE_PATH_USES_DRIVE_LETTERS)
192  return path_ == that.path_;
193#endif  // defined(FILE_PATH_USES_DRIVE_LETTERS)
194}
195
196bool FilePath::operator!=(const FilePath& that) const {
197#if defined(FILE_PATH_USES_DRIVE_LETTERS)
198  return !EqualDriveLetterCaseInsensitive(this->path_, that.path_);
199#else   // defined(FILE_PATH_USES_DRIVE_LETTERS)
200  return path_ != that.path_;
201#endif  // defined(FILE_PATH_USES_DRIVE_LETTERS)
202}
203
204// static
205bool FilePath::IsSeparator(CharType character) {
206  for (size_t i = 0; i < kSeparatorsLength - 1; ++i) {
207    if (character == kSeparators[i]) {
208      return true;
209    }
210  }
211
212  return false;
213}
214
215void FilePath::GetComponents(std::vector<StringType>* components) const {
216  DCHECK(components);
217  if (!components)
218    return;
219  components->clear();
220  if (value().empty())
221    return;
222
223  std::vector<StringType> ret_val;
224  FilePath current = *this;
225  FilePath base;
226
227  // Capture path components.
228  while (current != current.DirName()) {
229    base = current.BaseName();
230    if (!AreAllSeparators(base.value()))
231      ret_val.push_back(base.value());
232    current = current.DirName();
233  }
234
235  // Capture root, if any.
236  base = current.BaseName();
237  if (!base.value().empty() && base.value() != kCurrentDirectory)
238    ret_val.push_back(current.BaseName().value());
239
240  // Capture drive letter, if any.
241  FilePath dir = current.DirName();
242  StringType::size_type letter = FindDriveLetter(dir.value());
243  if (letter != StringType::npos) {
244    ret_val.push_back(StringType(dir.value(), 0, letter + 1));
245  }
246
247  *components = std::vector<StringType>(ret_val.rbegin(), ret_val.rend());
248}
249
250bool FilePath::IsParent(const FilePath& child) const {
251  return AppendRelativePath(child, nullptr);
252}
253
254bool FilePath::AppendRelativePath(const FilePath& child, FilePath* path) const {
255  std::vector<StringType> parent_components;
256  std::vector<StringType> child_components;
257  GetComponents(&parent_components);
258  child.GetComponents(&child_components);
259
260  if (parent_components.empty() ||
261      parent_components.size() >= child_components.size())
262    return false;
263
264  std::vector<StringType>::const_iterator parent_comp =
265      parent_components.begin();
266  std::vector<StringType>::const_iterator child_comp = child_components.begin();
267
268#if defined(FILE_PATH_USES_DRIVE_LETTERS)
269  // Windows can access case sensitive filesystems, so component
270  // comparisons must be case sensitive, but drive letters are
271  // never case sensitive.
272  if ((FindDriveLetter(*parent_comp) != StringType::npos) &&
273      (FindDriveLetter(*child_comp) != StringType::npos)) {
274    if (!StartsWithCaseInsensitiveASCII(*parent_comp, *child_comp))
275      return false;
276    ++parent_comp;
277    ++child_comp;
278  }
279#endif  // defined(FILE_PATH_USES_DRIVE_LETTERS)
280
281  while (parent_comp != parent_components.end()) {
282    if (*parent_comp != *child_comp)
283      return false;
284    ++parent_comp;
285    ++child_comp;
286  }
287
288  if (path != nullptr) {
289    for (; child_comp != child_components.end(); ++child_comp) {
290      *path = path->Append(*child_comp);
291    }
292  }
293  return true;
294}
295
296// libgen's dirname and basename aren't guaranteed to be thread-safe and aren't
297// guaranteed to not modify their input strings, and in fact are implemented
298// differently in this regard on different platforms.  Don't use them, but
299// adhere to their behavior.
300FilePath FilePath::DirName() const {
301  FilePath new_path(path_);
302  new_path.StripTrailingSeparatorsInternal();
303
304  // The drive letter, if any, always needs to remain in the output.  If there
305  // is no drive letter, as will always be the case on platforms which do not
306  // support drive letters, letter will be npos, or -1, so the comparisons and
307  // resizes below using letter will still be valid.
308  StringType::size_type letter = FindDriveLetter(new_path.path_);
309
310  StringType::size_type last_separator = new_path.path_.find_last_of(
311      kSeparators, StringType::npos, kSeparatorsLength - 1);
312  if (last_separator == StringType::npos) {
313    // path_ is in the current directory.
314    new_path.path_.resize(letter + 1);
315  } else if (last_separator == letter + 1) {
316    // path_ is in the root directory.
317    new_path.path_.resize(letter + 2);
318  } else if (last_separator == letter + 2 &&
319             IsSeparator(new_path.path_[letter + 1])) {
320    // path_ is in "//" (possibly with a drive letter); leave the double
321    // separator intact indicating alternate root.
322    new_path.path_.resize(letter + 3);
323  } else if (last_separator != 0) {
324    // path_ is somewhere else, trim the basename.
325    new_path.path_.resize(last_separator);
326  }
327
328  new_path.StripTrailingSeparatorsInternal();
329  if (!new_path.path_.length())
330    new_path.path_ = kCurrentDirectory;
331
332  return new_path;
333}
334
335FilePath FilePath::BaseName() const {
336  FilePath new_path(path_);
337  new_path.StripTrailingSeparatorsInternal();
338
339  // The drive letter, if any, is always stripped.
340  StringType::size_type letter = FindDriveLetter(new_path.path_);
341  if (letter != StringType::npos) {
342    new_path.path_.erase(0, letter + 1);
343  }
344
345  // Keep everything after the final separator, but if the pathname is only
346  // one character and it's a separator, leave it alone.
347  StringType::size_type last_separator = new_path.path_.find_last_of(
348      kSeparators, StringType::npos, kSeparatorsLength - 1);
349  if (last_separator != StringType::npos &&
350      last_separator < new_path.path_.length() - 1) {
351    new_path.path_.erase(0, last_separator + 1);
352  }
353
354  return new_path;
355}
356
357StringType FilePath::Extension() const {
358  FilePath base(BaseName());
359  const StringType::size_type dot = ExtensionSeparatorPosition(base.path_);
360  if (dot == StringType::npos)
361    return StringType();
362
363  return base.path_.substr(dot, StringType::npos);
364}
365
366StringType FilePath::FinalExtension() const {
367  FilePath base(BaseName());
368  const StringType::size_type dot = FinalExtensionSeparatorPosition(base.path_);
369  if (dot == StringType::npos)
370    return StringType();
371
372  return base.path_.substr(dot, StringType::npos);
373}
374
375FilePath FilePath::RemoveExtension() const {
376  if (Extension().empty())
377    return *this;
378
379  const StringType::size_type dot = ExtensionSeparatorPosition(path_);
380  if (dot == StringType::npos)
381    return *this;
382
383  return FilePath(path_.substr(0, dot));
384}
385
386FilePath FilePath::RemoveFinalExtension() const {
387  if (FinalExtension().empty())
388    return *this;
389
390  const StringType::size_type dot = FinalExtensionSeparatorPosition(path_);
391  if (dot == StringType::npos)
392    return *this;
393
394  return FilePath(path_.substr(0, dot));
395}
396
397FilePath FilePath::InsertBeforeExtension(StringViewType suffix) const {
398  if (suffix.empty())
399    return FilePath(path_);
400
401  if (IsEmptyOrSpecialCase(BaseName().value()))
402    return FilePath();
403
404  StringType ext = Extension();
405  StringType ret = RemoveExtension().value();
406  ret.append(suffix);
407  ret.append(ext);
408  return FilePath(ret);
409}
410
411FilePath FilePath::InsertBeforeExtensionASCII(std::string_view suffix) const {
412  DCHECK(IsStringASCII(suffix));
413#if defined(OS_WIN)
414  return InsertBeforeExtension(ASCIIToUTF16(suffix));
415#elif defined(OS_POSIX) || defined(OS_FUCHSIA)
416  return InsertBeforeExtension(suffix);
417#endif
418}
419
420FilePath FilePath::AddExtension(StringViewType extension) const {
421  if (IsEmptyOrSpecialCase(BaseName().value()))
422    return FilePath();
423
424  // If the new extension is "" or ".", then just return the current FilePath.
425  if (extension.empty() ||
426      (extension.size() == 1 && extension[0] == kExtensionSeparator))
427    return *this;
428
429  StringType str = path_;
430  if (extension[0] != kExtensionSeparator &&
431      *(str.end() - 1) != kExtensionSeparator) {
432    str.append(1, kExtensionSeparator);
433  }
434  str.append(extension);
435  return FilePath(str);
436}
437
438FilePath FilePath::ReplaceExtension(StringViewType extension) const {
439  if (IsEmptyOrSpecialCase(BaseName().value()))
440    return FilePath();
441
442  FilePath no_ext = RemoveExtension();
443  // If the new extension is "" or ".", then just remove the current extension.
444  if (extension.empty() ||
445      (extension.size() == 1 && extension[0] == kExtensionSeparator))
446    return no_ext;
447
448  StringType str = no_ext.value();
449  if (extension[0] != kExtensionSeparator)
450    str.append(1, kExtensionSeparator);
451  str.append(extension);
452  return FilePath(str);
453}
454
455FilePath FilePath::Append(StringViewType component) const {
456  StringViewType appended = component;
457  StringType without_nuls;
458
459  StringType::size_type nul_pos = component.find(kStringTerminator);
460  if (nul_pos != StringViewType::npos) {
461    without_nuls.assign(component.substr(0, nul_pos));
462    appended = StringViewType(without_nuls);
463  }
464
465  DCHECK(!IsPathAbsolute(appended));
466
467  if (path_.compare(kCurrentDirectory) == 0 && !appended.empty()) {
468    // Append normally doesn't do any normalization, but as a special case,
469    // when appending to kCurrentDirectory, just return a new path for the
470    // component argument.  Appending component to kCurrentDirectory would
471    // serve no purpose other than needlessly lengthening the path, and
472    // it's likely in practice to wind up with FilePath objects containing
473    // only kCurrentDirectory when calling DirName on a single relative path
474    // component.
475    return FilePath(appended);
476  }
477
478  FilePath new_path(path_);
479  new_path.StripTrailingSeparatorsInternal();
480
481  // Don't append a separator if the path is empty (indicating the current
482  // directory) or if the path component is empty (indicating nothing to
483  // append).
484  if (!appended.empty() && !new_path.path_.empty()) {
485    // Don't append a separator if the path still ends with a trailing
486    // separator after stripping (indicating the root directory).
487    if (!IsSeparator(new_path.path_.back())) {
488      // Don't append a separator if the path is just a drive letter.
489      if (FindDriveLetter(new_path.path_) + 1 != new_path.path_.length()) {
490        new_path.path_.append(1, kSeparators[0]);
491      }
492    }
493  }
494
495  new_path.path_.append(appended);
496  return new_path;
497}
498
499FilePath FilePath::Append(const FilePath& component) const {
500  return Append(component.value());
501}
502
503FilePath FilePath::AppendASCII(std::string_view component) const {
504  DCHECK(base::IsStringASCII(component));
505#if defined(OS_WIN)
506  return Append(ASCIIToUTF16(component));
507#elif defined(OS_POSIX) || defined(OS_FUCHSIA)
508  return Append(component);
509#endif
510}
511
512bool FilePath::IsAbsolute() const {
513  return IsPathAbsolute(path_);
514}
515
516bool FilePath::EndsWithSeparator() const {
517  if (empty())
518    return false;
519  return IsSeparator(path_.back());
520}
521
522FilePath FilePath::AsEndingWithSeparator() const {
523  if (EndsWithSeparator() || path_.empty())
524    return *this;
525
526  StringType path_str;
527  path_str.reserve(path_.length() + 1);  // Only allocate string once.
528
529  path_str = path_;
530  path_str.append(&kSeparators[0], 1);
531  return FilePath(path_str);
532}
533
534FilePath FilePath::StripTrailingSeparators() const {
535  FilePath new_path(path_);
536  new_path.StripTrailingSeparatorsInternal();
537
538  return new_path;
539}
540
541bool FilePath::ReferencesParent() const {
542  if (path_.find(kParentDirectory) == StringType::npos) {
543    // GetComponents is quite expensive, so avoid calling it in the majority
544    // of cases where there isn't a kParentDirectory anywhere in the path.
545    return false;
546  }
547
548  std::vector<StringType> components;
549  GetComponents(&components);
550
551  std::vector<StringType>::const_iterator it = components.begin();
552  for (; it != components.end(); ++it) {
553    const StringType& component = *it;
554    // Windows has odd, undocumented behavior with path components containing
555    // only whitespace and . characters. So, if all we see is . and
556    // whitespace, then we treat any .. sequence as referencing parent.
557    // For simplicity we enforce this on all platforms.
558    if (component.find_first_not_of(FILE_PATH_LITERAL(". \n\r\t")) ==
559            std::string::npos &&
560        component.find(kParentDirectory) != std::string::npos) {
561      return true;
562    }
563  }
564  return false;
565}
566
567#if defined(OS_WIN)
568
569std::u16string FilePath::LossyDisplayName() const {
570  return path_;
571}
572
573std::string FilePath::MaybeAsASCII() const {
574  if (base::IsStringASCII(path_))
575    return UTF16ToASCII(path_);
576  return std::string();
577}
578
579std::string FilePath::As8Bit() const {
580  return UTF16ToUTF8(value());
581}
582
583#elif defined(OS_POSIX) || defined(OS_FUCHSIA)
584
585// See file_path.h for a discussion of the encoding of paths on POSIX
586// platforms.  These encoding conversion functions are not quite correct.
587
588std::string FilePath::MaybeAsASCII() const {
589  if (base::IsStringASCII(path_))
590    return path_;
591  return std::string();
592}
593
594std::string FilePath::As8Bit() const {
595  return value();
596}
597
598#endif  // defined(OS_WIN)
599
600void FilePath::StripTrailingSeparatorsInternal() {
601  // If there is no drive letter, start will be 1, which will prevent stripping
602  // the leading separator if there is only one separator.  If there is a drive
603  // letter, start will be set appropriately to prevent stripping the first
604  // separator following the drive letter, if a separator immediately follows
605  // the drive letter.
606  StringType::size_type start = FindDriveLetter(path_) + 2;
607
608  StringType::size_type last_stripped = StringType::npos;
609  for (StringType::size_type pos = path_.length();
610       pos > start && IsSeparator(path_[pos - 1]); --pos) {
611    // If the string only has two separators and they're at the beginning,
612    // don't strip them, unless the string began with more than two separators.
613    if (pos != start + 1 || last_stripped == start + 2 ||
614        !IsSeparator(path_[start - 1])) {
615      path_.resize(pos - 1);
616      last_stripped = pos;
617    }
618  }
619}
620
621FilePath FilePath::NormalizePathSeparators() const {
622  return NormalizePathSeparatorsTo(kSeparators[0]);
623}
624
625FilePath FilePath::NormalizePathSeparatorsTo(CharType separator) const {
626#if defined(FILE_PATH_USES_WIN_SEPARATORS)
627  DCHECK_NE(static_cast<const void*>(kSeparators + kSeparatorsLength),
628            static_cast<const void*>(std::find(
629                kSeparators, kSeparators + kSeparatorsLength, separator)));
630  StringType copy = path_;
631  for (size_t i = 0; i < kSeparatorsLength; ++i) {
632    std::replace(copy.begin(), copy.end(), kSeparators[i], separator);
633  }
634  return FilePath(copy);
635#else
636  return *this;
637#endif
638}
639
640}  // namespace base
641