1// Copyright (c) 2012 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#include "base/files/file_path.h" 6 7#include <string.h> 8 9#include <algorithm> 10#include <iterator> 11#include <string> 12#include <string_view> 13 14#include "base/logging.h" 15#include "base/strings/string_util.h" 16#include "base/strings/utf_string_conversions.h" 17#include "util/build_config.h" 18 19#if defined(OS_MACOSX) 20#include "base/mac/scoped_cftyperef.h" 21#include "base/third_party/icu/icu_utf.h" 22#endif 23 24#if defined(OS_WIN) 25#include <windows.h> 26#elif defined(OS_MACOSX) 27#include <CoreFoundation/CoreFoundation.h> 28#endif 29 30namespace base { 31 32using StringType = FilePath::StringType; 33using StringViewType = FilePath::StringViewType; 34 35namespace { 36 37const char* const kCommonDoubleExtensionSuffixes[] = {"gz", "z", "bz2", "bz"}; 38const char* const kCommonDoubleExtensions[] = {"user.js"}; 39 40const FilePath::CharType kStringTerminator = FILE_PATH_LITERAL('\0'); 41 42// If this FilePath contains a drive letter specification, returns the 43// position of the last character of the drive letter specification, 44// otherwise returns npos. This can only be true on Windows, when a pathname 45// begins with a letter followed by a colon. On other platforms, this always 46// returns npos. 47StringViewType::size_type FindDriveLetter(StringViewType path) { 48#if defined(FILE_PATH_USES_DRIVE_LETTERS) 49 // This is dependent on an ASCII-based character set, but that's a 50 // reasonable assumption. iswalpha can be too inclusive here. 51 if (path.length() >= 2 && path[1] == L':' && 52 ((path[0] >= L'A' && path[0] <= L'Z') || 53 (path[0] >= L'a' && path[0] <= L'z'))) { 54 return 1; 55 } 56#endif // FILE_PATH_USES_DRIVE_LETTERS 57 return StringType::npos; 58} 59 60#if defined(FILE_PATH_USES_DRIVE_LETTERS) 61bool EqualDriveLetterCaseInsensitive(StringViewType a, StringViewType b) { 62 size_t a_letter_pos = FindDriveLetter(a); 63 size_t b_letter_pos = FindDriveLetter(b); 64 65 if (a_letter_pos == StringType::npos || b_letter_pos == StringType::npos) 66 return a == b; 67 68 StringViewType a_letter(a.substr(0, a_letter_pos + 1)); 69 StringViewType b_letter(b.substr(0, b_letter_pos + 1)); 70 if (!StartsWithCaseInsensitiveASCII(a_letter, b_letter)) 71 return false; 72 73 StringViewType a_rest(a.substr(a_letter_pos + 1)); 74 StringViewType b_rest(b.substr(b_letter_pos + 1)); 75 return a_rest == b_rest; 76} 77#endif // defined(FILE_PATH_USES_DRIVE_LETTERS) 78 79bool IsPathAbsolute(StringViewType path) { 80#if defined(FILE_PATH_USES_DRIVE_LETTERS) 81 StringType::size_type letter = FindDriveLetter(path); 82 if (letter != StringType::npos) { 83 // Look for a separator right after the drive specification. 84 return path.length() > letter + 1 && 85 FilePath::IsSeparator(path[letter + 1]); 86 } 87 // Look for a pair of leading separators. 88 return path.length() > 1 && FilePath::IsSeparator(path[0]) && 89 FilePath::IsSeparator(path[1]); 90#else // FILE_PATH_USES_DRIVE_LETTERS 91 // Look for a separator in the first position. 92 return path.length() > 0 && FilePath::IsSeparator(path[0]); 93#endif // FILE_PATH_USES_DRIVE_LETTERS 94} 95 96bool AreAllSeparators(const StringType& input) { 97 for (StringType::const_iterator it = input.begin(); it != input.end(); ++it) { 98 if (!FilePath::IsSeparator(*it)) 99 return false; 100 } 101 102 return true; 103} 104 105// Find the position of the '.' that separates the extension from the rest 106// of the file name. The position is relative to BaseName(), not value(). 107// Returns npos if it can't find an extension. 108StringType::size_type FinalExtensionSeparatorPosition(const StringType& path) { 109 // Special case "." and ".." 110 if (path == FilePath::kCurrentDirectory || path == FilePath::kParentDirectory) 111 return StringType::npos; 112 113 return path.rfind(FilePath::kExtensionSeparator); 114} 115 116// Same as above, but allow a second extension component of up to 4 117// characters when the rightmost extension component is a common double 118// extension (gz, bz2, Z). For example, foo.tar.gz or foo.tar.Z would have 119// extension components of '.tar.gz' and '.tar.Z' respectively. 120StringType::size_type ExtensionSeparatorPosition(const StringType& path) { 121 const StringType::size_type last_dot = FinalExtensionSeparatorPosition(path); 122 123 // No extension, or the extension is the whole filename. 124 if (last_dot == StringType::npos || last_dot == 0U) 125 return last_dot; 126 127 const StringType::size_type penultimate_dot = 128 path.rfind(FilePath::kExtensionSeparator, last_dot - 1); 129 const StringType::size_type last_separator = path.find_last_of( 130 FilePath::kSeparators, last_dot - 1, FilePath::kSeparatorsLength - 1); 131 132 if (penultimate_dot == StringType::npos || 133 (last_separator != StringType::npos && 134 penultimate_dot < last_separator)) { 135 return last_dot; 136 } 137 138 for (size_t i = 0; i < std::size(kCommonDoubleExtensions); ++i) { 139 StringType extension(path, penultimate_dot + 1); 140 if (LowerCaseEqualsASCII(extension, kCommonDoubleExtensions[i])) 141 return penultimate_dot; 142 } 143 144 StringType extension(path, last_dot + 1); 145 for (size_t i = 0; i < std::size(kCommonDoubleExtensionSuffixes); ++i) { 146 if (LowerCaseEqualsASCII(extension, kCommonDoubleExtensionSuffixes[i])) { 147 if ((last_dot - penultimate_dot) <= 5U && 148 (last_dot - penultimate_dot) > 1U) { 149 return penultimate_dot; 150 } 151 } 152 } 153 154 return last_dot; 155} 156 157// Returns true if path is "", ".", or "..". 158bool IsEmptyOrSpecialCase(const StringType& path) { 159 // Special cases "", ".", and ".." 160 if (path.empty() || path == FilePath::kCurrentDirectory || 161 path == FilePath::kParentDirectory) { 162 return true; 163 } 164 165 return false; 166} 167 168} // namespace 169 170FilePath::FilePath() = default; 171 172FilePath::FilePath(const FilePath& that) = default; 173FilePath::FilePath(FilePath&& that) noexcept = default; 174 175FilePath::FilePath(StringViewType path) { 176 path_.assign(path); 177 StringType::size_type nul_pos = path_.find(kStringTerminator); 178 if (nul_pos != StringType::npos) 179 path_.erase(nul_pos, StringType::npos); 180} 181 182FilePath::~FilePath() = default; 183 184FilePath& FilePath::operator=(const FilePath& that) = default; 185 186FilePath& FilePath::operator=(FilePath&& that) = default; 187 188bool FilePath::operator==(const FilePath& that) const { 189#if defined(FILE_PATH_USES_DRIVE_LETTERS) 190 return EqualDriveLetterCaseInsensitive(this->path_, that.path_); 191#else // defined(FILE_PATH_USES_DRIVE_LETTERS) 192 return path_ == that.path_; 193#endif // defined(FILE_PATH_USES_DRIVE_LETTERS) 194} 195 196bool FilePath::operator!=(const FilePath& that) const { 197#if defined(FILE_PATH_USES_DRIVE_LETTERS) 198 return !EqualDriveLetterCaseInsensitive(this->path_, that.path_); 199#else // defined(FILE_PATH_USES_DRIVE_LETTERS) 200 return path_ != that.path_; 201#endif // defined(FILE_PATH_USES_DRIVE_LETTERS) 202} 203 204// static 205bool FilePath::IsSeparator(CharType character) { 206 for (size_t i = 0; i < kSeparatorsLength - 1; ++i) { 207 if (character == kSeparators[i]) { 208 return true; 209 } 210 } 211 212 return false; 213} 214 215void FilePath::GetComponents(std::vector<StringType>* components) const { 216 DCHECK(components); 217 if (!components) 218 return; 219 components->clear(); 220 if (value().empty()) 221 return; 222 223 std::vector<StringType> ret_val; 224 FilePath current = *this; 225 FilePath base; 226 227 // Capture path components. 228 while (current != current.DirName()) { 229 base = current.BaseName(); 230 if (!AreAllSeparators(base.value())) 231 ret_val.push_back(base.value()); 232 current = current.DirName(); 233 } 234 235 // Capture root, if any. 236 base = current.BaseName(); 237 if (!base.value().empty() && base.value() != kCurrentDirectory) 238 ret_val.push_back(current.BaseName().value()); 239 240 // Capture drive letter, if any. 241 FilePath dir = current.DirName(); 242 StringType::size_type letter = FindDriveLetter(dir.value()); 243 if (letter != StringType::npos) { 244 ret_val.push_back(StringType(dir.value(), 0, letter + 1)); 245 } 246 247 *components = std::vector<StringType>(ret_val.rbegin(), ret_val.rend()); 248} 249 250bool FilePath::IsParent(const FilePath& child) const { 251 return AppendRelativePath(child, nullptr); 252} 253 254bool FilePath::AppendRelativePath(const FilePath& child, FilePath* path) const { 255 std::vector<StringType> parent_components; 256 std::vector<StringType> child_components; 257 GetComponents(&parent_components); 258 child.GetComponents(&child_components); 259 260 if (parent_components.empty() || 261 parent_components.size() >= child_components.size()) 262 return false; 263 264 std::vector<StringType>::const_iterator parent_comp = 265 parent_components.begin(); 266 std::vector<StringType>::const_iterator child_comp = child_components.begin(); 267 268#if defined(FILE_PATH_USES_DRIVE_LETTERS) 269 // Windows can access case sensitive filesystems, so component 270 // comparisons must be case sensitive, but drive letters are 271 // never case sensitive. 272 if ((FindDriveLetter(*parent_comp) != StringType::npos) && 273 (FindDriveLetter(*child_comp) != StringType::npos)) { 274 if (!StartsWithCaseInsensitiveASCII(*parent_comp, *child_comp)) 275 return false; 276 ++parent_comp; 277 ++child_comp; 278 } 279#endif // defined(FILE_PATH_USES_DRIVE_LETTERS) 280 281 while (parent_comp != parent_components.end()) { 282 if (*parent_comp != *child_comp) 283 return false; 284 ++parent_comp; 285 ++child_comp; 286 } 287 288 if (path != nullptr) { 289 for (; child_comp != child_components.end(); ++child_comp) { 290 *path = path->Append(*child_comp); 291 } 292 } 293 return true; 294} 295 296// libgen's dirname and basename aren't guaranteed to be thread-safe and aren't 297// guaranteed to not modify their input strings, and in fact are implemented 298// differently in this regard on different platforms. Don't use them, but 299// adhere to their behavior. 300FilePath FilePath::DirName() const { 301 FilePath new_path(path_); 302 new_path.StripTrailingSeparatorsInternal(); 303 304 // The drive letter, if any, always needs to remain in the output. If there 305 // is no drive letter, as will always be the case on platforms which do not 306 // support drive letters, letter will be npos, or -1, so the comparisons and 307 // resizes below using letter will still be valid. 308 StringType::size_type letter = FindDriveLetter(new_path.path_); 309 310 StringType::size_type last_separator = new_path.path_.find_last_of( 311 kSeparators, StringType::npos, kSeparatorsLength - 1); 312 if (last_separator == StringType::npos) { 313 // path_ is in the current directory. 314 new_path.path_.resize(letter + 1); 315 } else if (last_separator == letter + 1) { 316 // path_ is in the root directory. 317 new_path.path_.resize(letter + 2); 318 } else if (last_separator == letter + 2 && 319 IsSeparator(new_path.path_[letter + 1])) { 320 // path_ is in "//" (possibly with a drive letter); leave the double 321 // separator intact indicating alternate root. 322 new_path.path_.resize(letter + 3); 323 } else if (last_separator != 0) { 324 // path_ is somewhere else, trim the basename. 325 new_path.path_.resize(last_separator); 326 } 327 328 new_path.StripTrailingSeparatorsInternal(); 329 if (!new_path.path_.length()) 330 new_path.path_ = kCurrentDirectory; 331 332 return new_path; 333} 334 335FilePath FilePath::BaseName() const { 336 FilePath new_path(path_); 337 new_path.StripTrailingSeparatorsInternal(); 338 339 // The drive letter, if any, is always stripped. 340 StringType::size_type letter = FindDriveLetter(new_path.path_); 341 if (letter != StringType::npos) { 342 new_path.path_.erase(0, letter + 1); 343 } 344 345 // Keep everything after the final separator, but if the pathname is only 346 // one character and it's a separator, leave it alone. 347 StringType::size_type last_separator = new_path.path_.find_last_of( 348 kSeparators, StringType::npos, kSeparatorsLength - 1); 349 if (last_separator != StringType::npos && 350 last_separator < new_path.path_.length() - 1) { 351 new_path.path_.erase(0, last_separator + 1); 352 } 353 354 return new_path; 355} 356 357StringType FilePath::Extension() const { 358 FilePath base(BaseName()); 359 const StringType::size_type dot = ExtensionSeparatorPosition(base.path_); 360 if (dot == StringType::npos) 361 return StringType(); 362 363 return base.path_.substr(dot, StringType::npos); 364} 365 366StringType FilePath::FinalExtension() const { 367 FilePath base(BaseName()); 368 const StringType::size_type dot = FinalExtensionSeparatorPosition(base.path_); 369 if (dot == StringType::npos) 370 return StringType(); 371 372 return base.path_.substr(dot, StringType::npos); 373} 374 375FilePath FilePath::RemoveExtension() const { 376 if (Extension().empty()) 377 return *this; 378 379 const StringType::size_type dot = ExtensionSeparatorPosition(path_); 380 if (dot == StringType::npos) 381 return *this; 382 383 return FilePath(path_.substr(0, dot)); 384} 385 386FilePath FilePath::RemoveFinalExtension() const { 387 if (FinalExtension().empty()) 388 return *this; 389 390 const StringType::size_type dot = FinalExtensionSeparatorPosition(path_); 391 if (dot == StringType::npos) 392 return *this; 393 394 return FilePath(path_.substr(0, dot)); 395} 396 397FilePath FilePath::InsertBeforeExtension(StringViewType suffix) const { 398 if (suffix.empty()) 399 return FilePath(path_); 400 401 if (IsEmptyOrSpecialCase(BaseName().value())) 402 return FilePath(); 403 404 StringType ext = Extension(); 405 StringType ret = RemoveExtension().value(); 406 ret.append(suffix); 407 ret.append(ext); 408 return FilePath(ret); 409} 410 411FilePath FilePath::InsertBeforeExtensionASCII(std::string_view suffix) const { 412 DCHECK(IsStringASCII(suffix)); 413#if defined(OS_WIN) 414 return InsertBeforeExtension(ASCIIToUTF16(suffix)); 415#elif defined(OS_POSIX) || defined(OS_FUCHSIA) 416 return InsertBeforeExtension(suffix); 417#endif 418} 419 420FilePath FilePath::AddExtension(StringViewType extension) const { 421 if (IsEmptyOrSpecialCase(BaseName().value())) 422 return FilePath(); 423 424 // If the new extension is "" or ".", then just return the current FilePath. 425 if (extension.empty() || 426 (extension.size() == 1 && extension[0] == kExtensionSeparator)) 427 return *this; 428 429 StringType str = path_; 430 if (extension[0] != kExtensionSeparator && 431 *(str.end() - 1) != kExtensionSeparator) { 432 str.append(1, kExtensionSeparator); 433 } 434 str.append(extension); 435 return FilePath(str); 436} 437 438FilePath FilePath::ReplaceExtension(StringViewType extension) const { 439 if (IsEmptyOrSpecialCase(BaseName().value())) 440 return FilePath(); 441 442 FilePath no_ext = RemoveExtension(); 443 // If the new extension is "" or ".", then just remove the current extension. 444 if (extension.empty() || 445 (extension.size() == 1 && extension[0] == kExtensionSeparator)) 446 return no_ext; 447 448 StringType str = no_ext.value(); 449 if (extension[0] != kExtensionSeparator) 450 str.append(1, kExtensionSeparator); 451 str.append(extension); 452 return FilePath(str); 453} 454 455FilePath FilePath::Append(StringViewType component) const { 456 StringViewType appended = component; 457 StringType without_nuls; 458 459 StringType::size_type nul_pos = component.find(kStringTerminator); 460 if (nul_pos != StringViewType::npos) { 461 without_nuls.assign(component.substr(0, nul_pos)); 462 appended = StringViewType(without_nuls); 463 } 464 465 DCHECK(!IsPathAbsolute(appended)); 466 467 if (path_.compare(kCurrentDirectory) == 0 && !appended.empty()) { 468 // Append normally doesn't do any normalization, but as a special case, 469 // when appending to kCurrentDirectory, just return a new path for the 470 // component argument. Appending component to kCurrentDirectory would 471 // serve no purpose other than needlessly lengthening the path, and 472 // it's likely in practice to wind up with FilePath objects containing 473 // only kCurrentDirectory when calling DirName on a single relative path 474 // component. 475 return FilePath(appended); 476 } 477 478 FilePath new_path(path_); 479 new_path.StripTrailingSeparatorsInternal(); 480 481 // Don't append a separator if the path is empty (indicating the current 482 // directory) or if the path component is empty (indicating nothing to 483 // append). 484 if (!appended.empty() && !new_path.path_.empty()) { 485 // Don't append a separator if the path still ends with a trailing 486 // separator after stripping (indicating the root directory). 487 if (!IsSeparator(new_path.path_.back())) { 488 // Don't append a separator if the path is just a drive letter. 489 if (FindDriveLetter(new_path.path_) + 1 != new_path.path_.length()) { 490 new_path.path_.append(1, kSeparators[0]); 491 } 492 } 493 } 494 495 new_path.path_.append(appended); 496 return new_path; 497} 498 499FilePath FilePath::Append(const FilePath& component) const { 500 return Append(component.value()); 501} 502 503FilePath FilePath::AppendASCII(std::string_view component) const { 504 DCHECK(base::IsStringASCII(component)); 505#if defined(OS_WIN) 506 return Append(ASCIIToUTF16(component)); 507#elif defined(OS_POSIX) || defined(OS_FUCHSIA) 508 return Append(component); 509#endif 510} 511 512bool FilePath::IsAbsolute() const { 513 return IsPathAbsolute(path_); 514} 515 516bool FilePath::EndsWithSeparator() const { 517 if (empty()) 518 return false; 519 return IsSeparator(path_.back()); 520} 521 522FilePath FilePath::AsEndingWithSeparator() const { 523 if (EndsWithSeparator() || path_.empty()) 524 return *this; 525 526 StringType path_str; 527 path_str.reserve(path_.length() + 1); // Only allocate string once. 528 529 path_str = path_; 530 path_str.append(&kSeparators[0], 1); 531 return FilePath(path_str); 532} 533 534FilePath FilePath::StripTrailingSeparators() const { 535 FilePath new_path(path_); 536 new_path.StripTrailingSeparatorsInternal(); 537 538 return new_path; 539} 540 541bool FilePath::ReferencesParent() const { 542 if (path_.find(kParentDirectory) == StringType::npos) { 543 // GetComponents is quite expensive, so avoid calling it in the majority 544 // of cases where there isn't a kParentDirectory anywhere in the path. 545 return false; 546 } 547 548 std::vector<StringType> components; 549 GetComponents(&components); 550 551 std::vector<StringType>::const_iterator it = components.begin(); 552 for (; it != components.end(); ++it) { 553 const StringType& component = *it; 554 // Windows has odd, undocumented behavior with path components containing 555 // only whitespace and . characters. So, if all we see is . and 556 // whitespace, then we treat any .. sequence as referencing parent. 557 // For simplicity we enforce this on all platforms. 558 if (component.find_first_not_of(FILE_PATH_LITERAL(". \n\r\t")) == 559 std::string::npos && 560 component.find(kParentDirectory) != std::string::npos) { 561 return true; 562 } 563 } 564 return false; 565} 566 567#if defined(OS_WIN) 568 569std::u16string FilePath::LossyDisplayName() const { 570 return path_; 571} 572 573std::string FilePath::MaybeAsASCII() const { 574 if (base::IsStringASCII(path_)) 575 return UTF16ToASCII(path_); 576 return std::string(); 577} 578 579std::string FilePath::As8Bit() const { 580 return UTF16ToUTF8(value()); 581} 582 583#elif defined(OS_POSIX) || defined(OS_FUCHSIA) 584 585// See file_path.h for a discussion of the encoding of paths on POSIX 586// platforms. These encoding conversion functions are not quite correct. 587 588std::string FilePath::MaybeAsASCII() const { 589 if (base::IsStringASCII(path_)) 590 return path_; 591 return std::string(); 592} 593 594std::string FilePath::As8Bit() const { 595 return value(); 596} 597 598#endif // defined(OS_WIN) 599 600void FilePath::StripTrailingSeparatorsInternal() { 601 // If there is no drive letter, start will be 1, which will prevent stripping 602 // the leading separator if there is only one separator. If there is a drive 603 // letter, start will be set appropriately to prevent stripping the first 604 // separator following the drive letter, if a separator immediately follows 605 // the drive letter. 606 StringType::size_type start = FindDriveLetter(path_) + 2; 607 608 StringType::size_type last_stripped = StringType::npos; 609 for (StringType::size_type pos = path_.length(); 610 pos > start && IsSeparator(path_[pos - 1]); --pos) { 611 // If the string only has two separators and they're at the beginning, 612 // don't strip them, unless the string began with more than two separators. 613 if (pos != start + 1 || last_stripped == start + 2 || 614 !IsSeparator(path_[start - 1])) { 615 path_.resize(pos - 1); 616 last_stripped = pos; 617 } 618 } 619} 620 621FilePath FilePath::NormalizePathSeparators() const { 622 return NormalizePathSeparatorsTo(kSeparators[0]); 623} 624 625FilePath FilePath::NormalizePathSeparatorsTo(CharType separator) const { 626#if defined(FILE_PATH_USES_WIN_SEPARATORS) 627 DCHECK_NE(static_cast<const void*>(kSeparators + kSeparatorsLength), 628 static_cast<const void*>(std::find( 629 kSeparators, kSeparators + kSeparatorsLength, separator))); 630 StringType copy = path_; 631 for (size_t i = 0; i < kSeparatorsLength; ++i) { 632 std::replace(copy.begin(), copy.end(), kSeparators[i], separator); 633 } 634 return FilePath(copy); 635#else 636 return *this; 637#endif 638} 639 640} // namespace base 641