#!/usr/bin/env perl

# after significant changes, update the tarballs that need it and cp to moo for distribution; update the scut github
# export filename="/home/hjm/bin/cols"; scp ${filename} moo:~/public_html; scp ${filename} moo:~/bin;  scp ${filename}   dabrick:~/bin; ssh moo 'scp bin/cols hmangala@hpcs:~/bin'

# cd ~/gits/scut; cp ~/bin/cols .; git add cols; git commit -m 'commit message'; git push

use Getopt::Long;
#if ( @ARGV == 0 ) { die "\nNo data - check the help (cols --help)\n\n";}

&GetOptions("mw=i"     => \$max,    # max width of cols to print;
            "ml=i"     => \$lines,  # max lines to print (~head)
            "ch=i"     => \$ch,     # add a #-prefixed 0-based rown of col headers
            "help!"    => \$help,   # print the help
            "delim=s"  => \$delim,  # the delmiter to use
            "d=s"      => \$delim,  # ditto
            "skip=i"   => \$skip,
);

if (!defined $max)     { $max = 20;} # have to define if you want it wider or naroower.
if (!defined $lines)   { $lines = 10;}
if (!defined $ch)      { $ch = 0;}
if (!defined $delim)   { $delim = "\\s+";}
if ($delim eq "ws")    { $delim = "\\s+"} # ws = whitespace
if ($delim eq "tab")    { $delim = "\\t"} # translate tab into geek
if (!defined $skip)    { $skip = 0;}
if (defined $skip) {$lines += $skip;}

if (-t STDIN) {
    if ($help) {usage()}
    else {
        print "\n$0 aligns and numbers input fields so they can be inspected
    By default, only prints 10 lines.  Use '-h' for more help.\n"; 
    }
    exit 0;
}

#--skip=# Number of lines to skip before starting to split the fields.

$Nrows = 0;
$maxcols = 0;
LINE: while (<>) {
#  next LINE if (/^#/);
  if ($Nrows > $lines) {last;}
  chomp;
  $Ncols = @data = split(/$delim/);
  if ($Ncols > $maxcols) { $maxcols = $Ncols;}
  for (my $i=0; $i < $Ncols; $i++) {
    $bigarr[$Nrows][$i] = $data[$i];
  }
  $Nrows++;
}

my @colwidths;

for ($c=0; $c<$maxcols; $c++) {
  $maxwidth = 0;
  for ($r=0; $r<$Nrows;$r++) {
    $t = length($bigarr[$r][$c]);
    if ($t > $maxwidth ) {
      $maxwidth = $t;
    }
  }
  $colwidths[$c] = $maxwidth;
}


my $mc = $maxcols+$ch;
if (defined $ch) {
	# track the columns w/ a header
	for (my $rr=$ch; $rr<$mc; $rr++) {
      if (defined $max && $colwidths[$rr-$ch] > $max) { $w = $max;}
      else {$w = $colwidths[$rr-$ch];}
   	printf "%*s  ",  $w, $rr;
   }
   print "\n";
}

# And now print them all again with the right widths:
if (defined $max && $maxwidth > $max) { $maxwidth = $max; }
for ($r=0; $r<$Nrows;$r++) {
  for ($c=0; $c<$maxcols; $c++) {
    $str = $bigarr[$r][$c];
    if ($str eq ""){
        if    ($c <  10) {$str="-";}
        elsif ($c < 100) {$str="--";}
        elsif ($c <1000) {$str="---";}
        else             {$str="----";}
    } # it would be silly to view more than 10000 fields using cols
    if (defined $max && $colwidths[$c] > $max) {
      $w = $max;
      $str = substr($bigarr[$r][$c], 0, $max);
    }
    else {$w = $colwidths[$c]; }
    printf "%*s  ",  $w, $str;
  }
  print "\n";
}

#reminder
print "\nRemember: File truncated at line $lines.\n";


sub usage {
    print << "HELP";
    
	cols is a small Perl-based utility to view columns of data to help
programmers check that the columns correspond to what they want.
It strips tabs from the input and pads columns with spaces so it's
NOT meant to be used as a pipeline processing tool, only as a checking
tool.

usage: pipe or redirect X-delimited tabular data (where X=TAB by default,
but can be set to any Perl regex) to 'cols' with the following options:

--mw=#  set max width to this many chars or the max per-col width if smaller.
        Defaults to 20.

--ml=#  process this many lines of input. (Defaults to 10)

--ch=#  add a line of column headers (starting at # - defaults to 0)
			to the output to tell where you are in very wide output
			(very useful)

--delim=s the delimiter to use to split the fields (Defaults to TAB)
          Use 'ws' for whitespace (but you can use '\s+' if you want).
          Since the delim string is passed in verbatim to perl, you can
          use any perl regex you want to split on. ie  --delim='[:\s]+'
          will break on any number of either ':' or 'whitespaces'.

--help  dumps this help

Pipe output to 'less -S' to view long lines without wrap and arrow keys to
scroll around.

example:
          cols --mw=11 --ch=1 <huge.data |less -S
	
HELP

exit 0;
}