#!/usr/bin/env perl # after significant changes, update the tarballs that need it and cp to moo for distribution; update the scut github # export filename="/home/hjm/bin/cols"; scp ${filename} moo:~/public_html; scp ${filename} moo:~/bin; scp ${filename} dabrick:~/bin; ssh moo 'scp bin/cols hmangala@hpcs:~/bin' # cd ~/gits/scut; cp ~/bin/cols .; git add cols; git commit -m 'commit message'; git push use Getopt::Long; #if ( @ARGV == 0 ) { die "\nNo data - check the help (cols --help)\n\n";} &GetOptions("mw=i" => \$max, # max width of cols to print; "ml=i" => \$lines, # max lines to print (~head) "ch=i" => \$ch, # add a #-prefixed 0-based rown of col headers "help!" => \$help, # print the help "delim=s" => \$delim, # the delmiter to use "d=s" => \$delim, # ditto "skip=i" => \$skip, ); if (!defined $max) { $max = 20;} # have to define if you want it wider or naroower. if (!defined $lines) { $lines = 10;} if (!defined $ch) { $ch = 0;} if (!defined $delim) { $delim = "\\s+";} if ($delim eq "ws") { $delim = "\\s+"} # ws = whitespace if ($delim eq "tab") { $delim = "\\t"} # translate tab into geek if (!defined $skip) { $skip = 0;} if (defined $skip) {$lines += $skip;} if (-t STDIN) { if ($help) {usage()} else { print "\n$0 aligns and numbers input fields so they can be inspected By default, only prints 10 lines. Use '-h' for more help.\n"; } exit 0; } #--skip=# Number of lines to skip before starting to split the fields. $Nrows = 0; $maxcols = 0; LINE: while (<>) { # next LINE if (/^#/); if ($Nrows > $lines) {last;} chomp; $Ncols = @data = split(/$delim/); if ($Ncols > $maxcols) { $maxcols = $Ncols;} for (my $i=0; $i < $Ncols; $i++) { $bigarr[$Nrows][$i] = $data[$i]; } $Nrows++; } my @colwidths; for ($c=0; $c<$maxcols; $c++) { $maxwidth = 0; for ($r=0; $r<$Nrows;$r++) { $t = length($bigarr[$r][$c]); if ($t > $maxwidth ) { $maxwidth = $t; } } $colwidths[$c] = $maxwidth; } my $mc = $maxcols+$ch; if (defined $ch) { # track the columns w/ a header for (my $rr=$ch; $rr<$mc; $rr++) { if (defined $max && $colwidths[$rr-$ch] > $max) { $w = $max;} else {$w = $colwidths[$rr-$ch];} printf "%*s ", $w, $rr; } print "\n"; } # And now print them all again with the right widths: if (defined $max && $maxwidth > $max) { $maxwidth = $max; } for ($r=0; $r<$Nrows;$r++) { for ($c=0; $c<$maxcols; $c++) { $str = $bigarr[$r][$c]; if ($str eq ""){ if ($c < 10) {$str="-";} elsif ($c < 100) {$str="--";} elsif ($c <1000) {$str="---";} else {$str="----";} } # it would be silly to view more than 10000 fields using cols if (defined $max && $colwidths[$c] > $max) { $w = $max; $str = substr($bigarr[$r][$c], 0, $max); } else {$w = $colwidths[$c]; } printf "%*s ", $w, $str; } print "\n"; } #reminder print "\nRemember: File truncated at line $lines.\n"; sub usage { print << "HELP"; cols is a small Perl-based utility to view columns of data to help programmers check that the columns correspond to what they want. It strips tabs from the input and pads columns with spaces so it's NOT meant to be used as a pipeline processing tool, only as a checking tool. usage: pipe or redirect X-delimited tabular data (where X=TAB by default, but can be set to any Perl regex) to 'cols' with the following options: --mw=# set max width to this many chars or the max per-col width if smaller. Defaults to 20. --ml=# process this many lines of input. (Defaults to 10) --ch=# add a line of column headers (starting at # - defaults to 0) to the output to tell where you are in very wide output (very useful) --delim=s the delimiter to use to split the fields (Defaults to TAB) Use 'ws' for whitespace (but you can use '\s+' if you want). Since the delim string is passed in verbatim to perl, you can use any perl regex you want to split on. ie --delim='[:\s]+' will break on any number of either ':' or 'whitespaces'. --help dumps this help Pipe output to 'less -S' to view long lines without wrap and arrow keys to scroll around. example: cols --mw=11 --ch=1