18c2ecf20Sopenharmony_ci#!/usr/bin/env perl 28c2ecf20Sopenharmony_ci# SPDX-License-Identifier: GPL-2.0 38c2ecf20Sopenharmony_ci# 48c2ecf20Sopenharmony_ci# Treewide grep for references to files under Documentation, and report 58c2ecf20Sopenharmony_ci# non-existing files in stderr. 68c2ecf20Sopenharmony_ci 78c2ecf20Sopenharmony_ciuse warnings; 88c2ecf20Sopenharmony_ciuse strict; 98c2ecf20Sopenharmony_ciuse Getopt::Long qw(:config no_auto_abbrev); 108c2ecf20Sopenharmony_ci 118c2ecf20Sopenharmony_ci# NOTE: only add things here when the file was gone, but the text wants 128c2ecf20Sopenharmony_ci# to mention a past documentation file, for example, to give credits for 138c2ecf20Sopenharmony_ci# the original work. 148c2ecf20Sopenharmony_cimy %false_positives = ( 158c2ecf20Sopenharmony_ci "Documentation/scsi/scsi_mid_low_api.rst" => "Documentation/Configure.help", 168c2ecf20Sopenharmony_ci "drivers/vhost/vhost.c" => "Documentation/virtual/lguest/lguest.c", 178c2ecf20Sopenharmony_ci); 188c2ecf20Sopenharmony_ci 198c2ecf20Sopenharmony_cimy $scriptname = $0; 208c2ecf20Sopenharmony_ci$scriptname =~ s,.*/([^/]+/),$1,; 218c2ecf20Sopenharmony_ci 228c2ecf20Sopenharmony_ci# Parse arguments 238c2ecf20Sopenharmony_cimy $help = 0; 248c2ecf20Sopenharmony_cimy $fix = 0; 258c2ecf20Sopenharmony_cimy $warn = 0; 268c2ecf20Sopenharmony_ci 278c2ecf20Sopenharmony_ciif (! -d ".git") { 288c2ecf20Sopenharmony_ci printf "Warning: can't check if file exists, as this is not a git tree\n"; 298c2ecf20Sopenharmony_ci exit 0; 308c2ecf20Sopenharmony_ci} 318c2ecf20Sopenharmony_ci 328c2ecf20Sopenharmony_ciGetOptions( 338c2ecf20Sopenharmony_ci 'fix' => \$fix, 348c2ecf20Sopenharmony_ci 'warn' => \$warn, 358c2ecf20Sopenharmony_ci 'h|help|usage' => \$help, 368c2ecf20Sopenharmony_ci); 378c2ecf20Sopenharmony_ci 388c2ecf20Sopenharmony_ciif ($help != 0) { 398c2ecf20Sopenharmony_ci print "$scriptname [--help] [--fix]\n"; 408c2ecf20Sopenharmony_ci exit -1; 418c2ecf20Sopenharmony_ci} 428c2ecf20Sopenharmony_ci 438c2ecf20Sopenharmony_ci# Step 1: find broken references 448c2ecf20Sopenharmony_ciprint "Finding broken references. This may take a while... " if ($fix); 458c2ecf20Sopenharmony_ci 468c2ecf20Sopenharmony_cimy %broken_ref; 478c2ecf20Sopenharmony_ci 488c2ecf20Sopenharmony_cimy $doc_fix = 0; 498c2ecf20Sopenharmony_ci 508c2ecf20Sopenharmony_ciopen IN, "git grep ':doc:\`' Documentation/|" 518c2ecf20Sopenharmony_ci or die "Failed to run git grep"; 528c2ecf20Sopenharmony_ciwhile (<IN>) { 538c2ecf20Sopenharmony_ci next if (!m,^([^:]+):.*\:doc\:\`([^\`]+)\`,); 548c2ecf20Sopenharmony_ci next if (m,sphinx/,); 558c2ecf20Sopenharmony_ci 568c2ecf20Sopenharmony_ci my $file = $1; 578c2ecf20Sopenharmony_ci my $d = $1; 588c2ecf20Sopenharmony_ci my $doc_ref = $2; 598c2ecf20Sopenharmony_ci 608c2ecf20Sopenharmony_ci my $f = $doc_ref; 618c2ecf20Sopenharmony_ci 628c2ecf20Sopenharmony_ci $d =~ s,(.*/).*,$1,; 638c2ecf20Sopenharmony_ci $f =~ s,.*\<([^\>]+)\>,$1,; 648c2ecf20Sopenharmony_ci 658c2ecf20Sopenharmony_ci if ($f =~ m,^/,) { 668c2ecf20Sopenharmony_ci $f = "$f.rst"; 678c2ecf20Sopenharmony_ci $f =~ s,^/,Documentation/,; 688c2ecf20Sopenharmony_ci } else { 698c2ecf20Sopenharmony_ci $f = "$d$f.rst"; 708c2ecf20Sopenharmony_ci } 718c2ecf20Sopenharmony_ci 728c2ecf20Sopenharmony_ci next if (grep -e, glob("$f")); 738c2ecf20Sopenharmony_ci 748c2ecf20Sopenharmony_ci if ($fix && !$doc_fix) { 758c2ecf20Sopenharmony_ci print STDERR "\nWARNING: Currently, can't fix broken :doc:`` fields\n"; 768c2ecf20Sopenharmony_ci } 778c2ecf20Sopenharmony_ci $doc_fix++; 788c2ecf20Sopenharmony_ci 798c2ecf20Sopenharmony_ci print STDERR "$file: :doc:`$doc_ref`\n"; 808c2ecf20Sopenharmony_ci} 818c2ecf20Sopenharmony_ciclose IN; 828c2ecf20Sopenharmony_ci 838c2ecf20Sopenharmony_ciopen IN, "git grep 'Documentation/'|" 848c2ecf20Sopenharmony_ci or die "Failed to run git grep"; 858c2ecf20Sopenharmony_ciwhile (<IN>) { 868c2ecf20Sopenharmony_ci next if (!m/^([^:]+):(.*)/); 878c2ecf20Sopenharmony_ci 888c2ecf20Sopenharmony_ci my $f = $1; 898c2ecf20Sopenharmony_ci my $ln = $2; 908c2ecf20Sopenharmony_ci 918c2ecf20Sopenharmony_ci # On linux-next, discard the Next/ directory 928c2ecf20Sopenharmony_ci next if ($f =~ m,^Next/,); 938c2ecf20Sopenharmony_ci 948c2ecf20Sopenharmony_ci # Makefiles and scripts contain nasty expressions to parse docs 958c2ecf20Sopenharmony_ci next if ($f =~ m/Makefile/ || $f =~ m/\.sh$/); 968c2ecf20Sopenharmony_ci 978c2ecf20Sopenharmony_ci # Skip this script 988c2ecf20Sopenharmony_ci next if ($f eq $scriptname); 998c2ecf20Sopenharmony_ci 1008c2ecf20Sopenharmony_ci # Ignore the dir where documentation will be built 1018c2ecf20Sopenharmony_ci next if ($ln =~ m,\b(\S*)Documentation/output,); 1028c2ecf20Sopenharmony_ci 1038c2ecf20Sopenharmony_ci if ($ln =~ m,\b(\S*)(Documentation/[A-Za-z0-9\_\.\,\~/\*\[\]\?+-]*)(.*),) { 1048c2ecf20Sopenharmony_ci my $prefix = $1; 1058c2ecf20Sopenharmony_ci my $ref = $2; 1068c2ecf20Sopenharmony_ci my $base = $2; 1078c2ecf20Sopenharmony_ci my $extra = $3; 1088c2ecf20Sopenharmony_ci 1098c2ecf20Sopenharmony_ci # some file references are like: 1108c2ecf20Sopenharmony_ci # /usr/src/linux/Documentation/DMA-{API,mapping}.txt 1118c2ecf20Sopenharmony_ci # For now, ignore them 1128c2ecf20Sopenharmony_ci next if ($extra =~ m/^{/); 1138c2ecf20Sopenharmony_ci 1148c2ecf20Sopenharmony_ci # Remove footnotes at the end like: 1158c2ecf20Sopenharmony_ci # Documentation/devicetree/dt-object-internal.txt[1] 1168c2ecf20Sopenharmony_ci $ref =~ s/(txt|rst)\[\d+]$/$1/; 1178c2ecf20Sopenharmony_ci 1188c2ecf20Sopenharmony_ci # Remove ending ']' without any '[' 1198c2ecf20Sopenharmony_ci $ref =~ s/\].*// if (!($ref =~ m/\[/)); 1208c2ecf20Sopenharmony_ci 1218c2ecf20Sopenharmony_ci # Remove puntuation marks at the end 1228c2ecf20Sopenharmony_ci $ref =~ s/[\,\.]+$//; 1238c2ecf20Sopenharmony_ci 1248c2ecf20Sopenharmony_ci my $fulref = "$prefix$ref"; 1258c2ecf20Sopenharmony_ci 1268c2ecf20Sopenharmony_ci $fulref =~ s/^(\<file|ref)://; 1278c2ecf20Sopenharmony_ci $fulref =~ s/^[\'\`]+//; 1288c2ecf20Sopenharmony_ci $fulref =~ s,^\$\(.*\)/,,; 1298c2ecf20Sopenharmony_ci $base =~ s,.*/,,; 1308c2ecf20Sopenharmony_ci 1318c2ecf20Sopenharmony_ci # Remove URL false-positives 1328c2ecf20Sopenharmony_ci next if ($fulref =~ m/^http/); 1338c2ecf20Sopenharmony_ci 1348c2ecf20Sopenharmony_ci # Remove sched-pelt false-positive 1358c2ecf20Sopenharmony_ci next if ($fulref =~ m,^Documentation/scheduler/sched-pelt$,); 1368c2ecf20Sopenharmony_ci 1378c2ecf20Sopenharmony_ci # Discard some build examples from Documentation/target/tcm_mod_builder.rst 1388c2ecf20Sopenharmony_ci next if ($fulref =~ m,mnt/sdb/lio-core-2.6.git/Documentation/target,); 1398c2ecf20Sopenharmony_ci 1408c2ecf20Sopenharmony_ci # Check if exists, evaluating wildcards 1418c2ecf20Sopenharmony_ci next if (grep -e, glob("$ref $fulref")); 1428c2ecf20Sopenharmony_ci 1438c2ecf20Sopenharmony_ci # Accept relative Documentation patches for tools/ 1448c2ecf20Sopenharmony_ci if ($f =~ m/tools/) { 1458c2ecf20Sopenharmony_ci my $path = $f; 1468c2ecf20Sopenharmony_ci $path =~ s,(.*)/.*,$1,; 1478c2ecf20Sopenharmony_ci next if (grep -e, glob("$path/$ref $path/../$ref $path/$fulref")); 1488c2ecf20Sopenharmony_ci } 1498c2ecf20Sopenharmony_ci 1508c2ecf20Sopenharmony_ci # Discard known false-positives 1518c2ecf20Sopenharmony_ci if (defined($false_positives{$f})) { 1528c2ecf20Sopenharmony_ci next if ($false_positives{$f} eq $fulref); 1538c2ecf20Sopenharmony_ci } 1548c2ecf20Sopenharmony_ci 1558c2ecf20Sopenharmony_ci if ($fix) { 1568c2ecf20Sopenharmony_ci if (!($ref =~ m/(scripts|Kconfig|Kbuild)/)) { 1578c2ecf20Sopenharmony_ci $broken_ref{$ref}++; 1588c2ecf20Sopenharmony_ci } 1598c2ecf20Sopenharmony_ci } elsif ($warn) { 1608c2ecf20Sopenharmony_ci print STDERR "Warning: $f references a file that doesn't exist: $fulref\n"; 1618c2ecf20Sopenharmony_ci } else { 1628c2ecf20Sopenharmony_ci print STDERR "$f: $fulref\n"; 1638c2ecf20Sopenharmony_ci } 1648c2ecf20Sopenharmony_ci } 1658c2ecf20Sopenharmony_ci} 1668c2ecf20Sopenharmony_ciclose IN; 1678c2ecf20Sopenharmony_ci 1688c2ecf20Sopenharmony_ciexit 0 if (!$fix); 1698c2ecf20Sopenharmony_ci 1708c2ecf20Sopenharmony_ci# Step 2: Seek for file name alternatives 1718c2ecf20Sopenharmony_ciprint "Auto-fixing broken references. Please double-check the results\n"; 1728c2ecf20Sopenharmony_ci 1738c2ecf20Sopenharmony_ciforeach my $ref (keys %broken_ref) { 1748c2ecf20Sopenharmony_ci my $new =$ref; 1758c2ecf20Sopenharmony_ci 1768c2ecf20Sopenharmony_ci my $basedir = "."; 1778c2ecf20Sopenharmony_ci # On translations, only seek inside the translations directory 1788c2ecf20Sopenharmony_ci $basedir = $1 if ($ref =~ m,(Documentation/translations/[^/]+),); 1798c2ecf20Sopenharmony_ci 1808c2ecf20Sopenharmony_ci # get just the basename 1818c2ecf20Sopenharmony_ci $new =~ s,.*/,,; 1828c2ecf20Sopenharmony_ci 1838c2ecf20Sopenharmony_ci my $f=""; 1848c2ecf20Sopenharmony_ci 1858c2ecf20Sopenharmony_ci # usual reason for breakage: DT file moved around 1868c2ecf20Sopenharmony_ci if ($ref =~ /devicetree/) { 1878c2ecf20Sopenharmony_ci # usual reason for breakage: DT file renamed to .yaml 1888c2ecf20Sopenharmony_ci if (!$f) { 1898c2ecf20Sopenharmony_ci my $new_ref = $ref; 1908c2ecf20Sopenharmony_ci $new_ref =~ s/\.txt$/.yaml/; 1918c2ecf20Sopenharmony_ci $f=$new_ref if (-f $new_ref); 1928c2ecf20Sopenharmony_ci } 1938c2ecf20Sopenharmony_ci 1948c2ecf20Sopenharmony_ci if (!$f) { 1958c2ecf20Sopenharmony_ci my $search = $new; 1968c2ecf20Sopenharmony_ci $search =~ s,^.*/,,; 1978c2ecf20Sopenharmony_ci $f = qx(find Documentation/devicetree/ -iname "*$search*") if ($search); 1988c2ecf20Sopenharmony_ci if (!$f) { 1998c2ecf20Sopenharmony_ci # Manufacturer name may have changed 2008c2ecf20Sopenharmony_ci $search =~ s/^.*,//; 2018c2ecf20Sopenharmony_ci $f = qx(find Documentation/devicetree/ -iname "*$search*") if ($search); 2028c2ecf20Sopenharmony_ci } 2038c2ecf20Sopenharmony_ci } 2048c2ecf20Sopenharmony_ci } 2058c2ecf20Sopenharmony_ci 2068c2ecf20Sopenharmony_ci # usual reason for breakage: file renamed to .rst 2078c2ecf20Sopenharmony_ci if (!$f) { 2088c2ecf20Sopenharmony_ci $new =~ s/\.txt$/.rst/; 2098c2ecf20Sopenharmony_ci $f=qx(find $basedir -iname $new) if ($new); 2108c2ecf20Sopenharmony_ci } 2118c2ecf20Sopenharmony_ci 2128c2ecf20Sopenharmony_ci # usual reason for breakage: use dash or underline 2138c2ecf20Sopenharmony_ci if (!$f) { 2148c2ecf20Sopenharmony_ci $new =~ s/[-_]/[-_]/g; 2158c2ecf20Sopenharmony_ci $f=qx(find $basedir -iname $new) if ($new); 2168c2ecf20Sopenharmony_ci } 2178c2ecf20Sopenharmony_ci 2188c2ecf20Sopenharmony_ci # Wild guess: seek for the same name on another place 2198c2ecf20Sopenharmony_ci if (!$f) { 2208c2ecf20Sopenharmony_ci $f = qx(find $basedir -iname $new) if ($new); 2218c2ecf20Sopenharmony_ci } 2228c2ecf20Sopenharmony_ci 2238c2ecf20Sopenharmony_ci my @find = split /\s+/, $f; 2248c2ecf20Sopenharmony_ci 2258c2ecf20Sopenharmony_ci if (!$f) { 2268c2ecf20Sopenharmony_ci print STDERR "ERROR: Didn't find a replacement for $ref\n"; 2278c2ecf20Sopenharmony_ci } elsif (scalar(@find) > 1) { 2288c2ecf20Sopenharmony_ci print STDERR "WARNING: Won't auto-replace, as found multiple files close to $ref:\n"; 2298c2ecf20Sopenharmony_ci foreach my $j (@find) { 2308c2ecf20Sopenharmony_ci $j =~ s,^./,,; 2318c2ecf20Sopenharmony_ci print STDERR " $j\n"; 2328c2ecf20Sopenharmony_ci } 2338c2ecf20Sopenharmony_ci } else { 2348c2ecf20Sopenharmony_ci $f = $find[0]; 2358c2ecf20Sopenharmony_ci $f =~ s,^./,,; 2368c2ecf20Sopenharmony_ci print "INFO: Replacing $ref to $f\n"; 2378c2ecf20Sopenharmony_ci foreach my $j (qx(git grep -l $ref)) { 2388c2ecf20Sopenharmony_ci qx(sed "s\@$ref\@$f\@g" -i $j); 2398c2ecf20Sopenharmony_ci } 2408c2ecf20Sopenharmony_ci } 2418c2ecf20Sopenharmony_ci} 242