18c2ecf20Sopenharmony_ci#!/usr/bin/env perl 28c2ecf20Sopenharmony_ci# SPDX-License-Identifier: GPL-2.0 38c2ecf20Sopenharmony_ci# 48c2ecf20Sopenharmony_ci# Clean a text file -- or directory of text files -- of stealth whitespace. 58c2ecf20Sopenharmony_ci# WARNING: this can be a highly destructive operation. Use with caution. 68c2ecf20Sopenharmony_ci# 78c2ecf20Sopenharmony_ci 88c2ecf20Sopenharmony_ciuse warnings; 98c2ecf20Sopenharmony_ciuse bytes; 108c2ecf20Sopenharmony_ciuse File::Basename; 118c2ecf20Sopenharmony_ci 128c2ecf20Sopenharmony_ci# Default options 138c2ecf20Sopenharmony_ci$max_width = 79; 148c2ecf20Sopenharmony_ci 158c2ecf20Sopenharmony_ci# Clean up space-tab sequences, either by removing spaces or 168c2ecf20Sopenharmony_ci# replacing them with tabs. 178c2ecf20Sopenharmony_cisub clean_space_tabs($) 188c2ecf20Sopenharmony_ci{ 198c2ecf20Sopenharmony_ci no bytes; # Tab alignment depends on characters 208c2ecf20Sopenharmony_ci 218c2ecf20Sopenharmony_ci my($li) = @_; 228c2ecf20Sopenharmony_ci my($lo) = ''; 238c2ecf20Sopenharmony_ci my $pos = 0; 248c2ecf20Sopenharmony_ci my $nsp = 0; 258c2ecf20Sopenharmony_ci my($i, $c); 268c2ecf20Sopenharmony_ci 278c2ecf20Sopenharmony_ci for ($i = 0; $i < length($li); $i++) { 288c2ecf20Sopenharmony_ci $c = substr($li, $i, 1); 298c2ecf20Sopenharmony_ci if ($c eq "\t") { 308c2ecf20Sopenharmony_ci my $npos = ($pos+$nsp+8) & ~7; 318c2ecf20Sopenharmony_ci my $ntab = ($npos >> 3) - ($pos >> 3); 328c2ecf20Sopenharmony_ci $lo .= "\t" x $ntab; 338c2ecf20Sopenharmony_ci $pos = $npos; 348c2ecf20Sopenharmony_ci $nsp = 0; 358c2ecf20Sopenharmony_ci } elsif ($c eq "\n" || $c eq "\r") { 368c2ecf20Sopenharmony_ci $lo .= " " x $nsp; 378c2ecf20Sopenharmony_ci $pos += $nsp; 388c2ecf20Sopenharmony_ci $nsp = 0; 398c2ecf20Sopenharmony_ci $lo .= $c; 408c2ecf20Sopenharmony_ci $pos = 0; 418c2ecf20Sopenharmony_ci } elsif ($c eq " ") { 428c2ecf20Sopenharmony_ci $nsp++; 438c2ecf20Sopenharmony_ci } else { 448c2ecf20Sopenharmony_ci $lo .= " " x $nsp; 458c2ecf20Sopenharmony_ci $pos += $nsp; 468c2ecf20Sopenharmony_ci $nsp = 0; 478c2ecf20Sopenharmony_ci $lo .= $c; 488c2ecf20Sopenharmony_ci $pos++; 498c2ecf20Sopenharmony_ci } 508c2ecf20Sopenharmony_ci } 518c2ecf20Sopenharmony_ci $lo .= " " x $nsp; 528c2ecf20Sopenharmony_ci return $lo; 538c2ecf20Sopenharmony_ci} 548c2ecf20Sopenharmony_ci 558c2ecf20Sopenharmony_ci# Compute the visual width of a string 568c2ecf20Sopenharmony_cisub strwidth($) { 578c2ecf20Sopenharmony_ci no bytes; # Tab alignment depends on characters 588c2ecf20Sopenharmony_ci 598c2ecf20Sopenharmony_ci my($li) = @_; 608c2ecf20Sopenharmony_ci my($c, $i); 618c2ecf20Sopenharmony_ci my $pos = 0; 628c2ecf20Sopenharmony_ci my $mlen = 0; 638c2ecf20Sopenharmony_ci 648c2ecf20Sopenharmony_ci for ($i = 0; $i < length($li); $i++) { 658c2ecf20Sopenharmony_ci $c = substr($li,$i,1); 668c2ecf20Sopenharmony_ci if ($c eq "\t") { 678c2ecf20Sopenharmony_ci $pos = ($pos+8) & ~7; 688c2ecf20Sopenharmony_ci } elsif ($c eq "\n") { 698c2ecf20Sopenharmony_ci $mlen = $pos if ($pos > $mlen); 708c2ecf20Sopenharmony_ci $pos = 0; 718c2ecf20Sopenharmony_ci } else { 728c2ecf20Sopenharmony_ci $pos++; 738c2ecf20Sopenharmony_ci } 748c2ecf20Sopenharmony_ci } 758c2ecf20Sopenharmony_ci 768c2ecf20Sopenharmony_ci $mlen = $pos if ($pos > $mlen); 778c2ecf20Sopenharmony_ci return $mlen; 788c2ecf20Sopenharmony_ci} 798c2ecf20Sopenharmony_ci 808c2ecf20Sopenharmony_ci$name = basename($0); 818c2ecf20Sopenharmony_ci 828c2ecf20Sopenharmony_ci@files = (); 838c2ecf20Sopenharmony_ci 848c2ecf20Sopenharmony_ciwhile (defined($a = shift(@ARGV))) { 858c2ecf20Sopenharmony_ci if ($a =~ /^-/) { 868c2ecf20Sopenharmony_ci if ($a eq '-width' || $a eq '-w') { 878c2ecf20Sopenharmony_ci $max_width = shift(@ARGV)+0; 888c2ecf20Sopenharmony_ci } else { 898c2ecf20Sopenharmony_ci print STDERR "Usage: $name [-width #] files...\n"; 908c2ecf20Sopenharmony_ci exit 1; 918c2ecf20Sopenharmony_ci } 928c2ecf20Sopenharmony_ci } else { 938c2ecf20Sopenharmony_ci push(@files, $a); 948c2ecf20Sopenharmony_ci } 958c2ecf20Sopenharmony_ci} 968c2ecf20Sopenharmony_ci 978c2ecf20Sopenharmony_ciforeach $f ( @files ) { 988c2ecf20Sopenharmony_ci print STDERR "$name: $f\n"; 998c2ecf20Sopenharmony_ci 1008c2ecf20Sopenharmony_ci if (! -f $f) { 1018c2ecf20Sopenharmony_ci print STDERR "$f: not a file\n"; 1028c2ecf20Sopenharmony_ci next; 1038c2ecf20Sopenharmony_ci } 1048c2ecf20Sopenharmony_ci 1058c2ecf20Sopenharmony_ci if (!open(FILE, '+<', $f)) { 1068c2ecf20Sopenharmony_ci print STDERR "$name: Cannot open file: $f: $!\n"; 1078c2ecf20Sopenharmony_ci next; 1088c2ecf20Sopenharmony_ci } 1098c2ecf20Sopenharmony_ci 1108c2ecf20Sopenharmony_ci binmode FILE; 1118c2ecf20Sopenharmony_ci 1128c2ecf20Sopenharmony_ci # First, verify that it is not a binary file; consider any file 1138c2ecf20Sopenharmony_ci # with a zero byte to be a binary file. Is there any better, or 1148c2ecf20Sopenharmony_ci # additional, heuristic that should be applied? 1158c2ecf20Sopenharmony_ci $is_binary = 0; 1168c2ecf20Sopenharmony_ci 1178c2ecf20Sopenharmony_ci while (read(FILE, $data, 65536) > 0) { 1188c2ecf20Sopenharmony_ci if ($data =~ /\0/) { 1198c2ecf20Sopenharmony_ci $is_binary = 1; 1208c2ecf20Sopenharmony_ci last; 1218c2ecf20Sopenharmony_ci } 1228c2ecf20Sopenharmony_ci } 1238c2ecf20Sopenharmony_ci 1248c2ecf20Sopenharmony_ci if ($is_binary) { 1258c2ecf20Sopenharmony_ci print STDERR "$name: $f: binary file\n"; 1268c2ecf20Sopenharmony_ci next; 1278c2ecf20Sopenharmony_ci } 1288c2ecf20Sopenharmony_ci 1298c2ecf20Sopenharmony_ci seek(FILE, 0, 0); 1308c2ecf20Sopenharmony_ci 1318c2ecf20Sopenharmony_ci $in_bytes = 0; 1328c2ecf20Sopenharmony_ci $out_bytes = 0; 1338c2ecf20Sopenharmony_ci $blank_bytes = 0; 1348c2ecf20Sopenharmony_ci 1358c2ecf20Sopenharmony_ci @blanks = (); 1368c2ecf20Sopenharmony_ci @lines = (); 1378c2ecf20Sopenharmony_ci $lineno = 0; 1388c2ecf20Sopenharmony_ci 1398c2ecf20Sopenharmony_ci while ( defined($line = <FILE>) ) { 1408c2ecf20Sopenharmony_ci $lineno++; 1418c2ecf20Sopenharmony_ci $in_bytes += length($line); 1428c2ecf20Sopenharmony_ci $line =~ s/[ \t\r]*$//; # Remove trailing spaces 1438c2ecf20Sopenharmony_ci $line = clean_space_tabs($line); 1448c2ecf20Sopenharmony_ci 1458c2ecf20Sopenharmony_ci if ( $line eq "\n" ) { 1468c2ecf20Sopenharmony_ci push(@blanks, $line); 1478c2ecf20Sopenharmony_ci $blank_bytes += length($line); 1488c2ecf20Sopenharmony_ci } else { 1498c2ecf20Sopenharmony_ci push(@lines, @blanks); 1508c2ecf20Sopenharmony_ci $out_bytes += $blank_bytes; 1518c2ecf20Sopenharmony_ci push(@lines, $line); 1528c2ecf20Sopenharmony_ci $out_bytes += length($line); 1538c2ecf20Sopenharmony_ci @blanks = (); 1548c2ecf20Sopenharmony_ci $blank_bytes = 0; 1558c2ecf20Sopenharmony_ci } 1568c2ecf20Sopenharmony_ci 1578c2ecf20Sopenharmony_ci $l_width = strwidth($line); 1588c2ecf20Sopenharmony_ci if ($max_width && $l_width > $max_width) { 1598c2ecf20Sopenharmony_ci print STDERR 1608c2ecf20Sopenharmony_ci "$f:$lineno: line exceeds $max_width characters ($l_width)\n"; 1618c2ecf20Sopenharmony_ci } 1628c2ecf20Sopenharmony_ci } 1638c2ecf20Sopenharmony_ci 1648c2ecf20Sopenharmony_ci # Any blanks at the end of the file are discarded 1658c2ecf20Sopenharmony_ci 1668c2ecf20Sopenharmony_ci if ($in_bytes != $out_bytes) { 1678c2ecf20Sopenharmony_ci # Only write to the file if changed 1688c2ecf20Sopenharmony_ci seek(FILE, 0, 0); 1698c2ecf20Sopenharmony_ci print FILE @lines; 1708c2ecf20Sopenharmony_ci 1718c2ecf20Sopenharmony_ci if ( !defined($where = tell(FILE)) || 1728c2ecf20Sopenharmony_ci !truncate(FILE, $where) ) { 1738c2ecf20Sopenharmony_ci die "$name: Failed to truncate modified file: $f: $!\n"; 1748c2ecf20Sopenharmony_ci } 1758c2ecf20Sopenharmony_ci } 1768c2ecf20Sopenharmony_ci 1778c2ecf20Sopenharmony_ci close(FILE); 1788c2ecf20Sopenharmony_ci} 179