#!/usr/bin/env perl
use strict;
use Getopt::Long;      # for std option handling: -h --yadda=badda, etc
use Socket;
use Env qw(HOME PATH);

# TODO
# - when catching up to a previous rsync, should cycle fast until it's all 
#    caught up then drop back to regular periodicity. Maybe, regardless of 
# requested cycle time, it cycles very fast for the 1st few to check that it's caught up?
# - check section where: INFO: Waiting for fpart to get ahead of the transfer
#   (~392 - this seems to be infinite-looping in a few cases.
# - use this form for DEBUGs: if ($DEBUG){&debug(__LINE__, "RPMLIST = [$rpmlist]\n")}
# - look into either writing a new script or expanding this one for another use;
#   first time or one-time data movement - ie no rsync capabilities needed.
#   uses fpart -L -> tar -> pigz -> nc/mbuffer -> network and then reverse.
#   the order to place data on the remote target.
# - refactor this code - what a mess.

use vars qw($allPIDs $ALL_SYS_RSYNC_PIDS $BAREFILES $ch $CHECKPERIOD $cmd 
$crr $CUR_FP_FLE $CUR_FPI $DATE $dcnt $DEBUG @DIRS @DIRS2SYNC $dirtmp 
$EMAIL $Filecnt %FILES $fl $fn $fnd2r $FOUT $FPART_LOGFILE $FPART_PID 
$FPART_RUNNING $FPARTSIZE $FPARTSIZE_N $FP_PIDFILE $FP_ROOT 
$FP_ROOT_DIR $FP_RUNNING $hdr_cnt $hdr_rpt $HELP $IF_SPEED 
$LOAD1mratio $loadavg $logfile $MAXBW $MAXLOAD $nbr_cur_fpc_fles 
$NBR_FP_FLES $NCPUs $NDIRS $NETIF $NOWAIT $NP $NP_chunk $parsync_dir 
$parsync_dir $PARSYNCVER $PIDFILE $PIDFILE $prev_cache $QUIET 
$rem_host $remote $rem_path $rem_user $REUSECACHE $rootdir $rPIDs $sPIDs
$ROOTDIR $RSYNC_CMD $RSYNCOPTS $RSYNCS_GOING $STILLRSYNCS 
@SYSLOAD $TARGET $tmp $Totlsiz %UTILS $VERSION
);

$PARSYNCVER =  << "VERSION";

parsyncfp version 1.11 (beta)
10-09-2014
by Harry Mangalam <hjmangalam\@gmail.com> || <harry.mangalam\@uci.edu>

parsyncfp is a Perl script that wraps Andrew Tridgell's miraculous
'rsync' to provide some load balancing and parallel operation across
network connections to increase the amount of bandwidth it can use.
The 'fp' variant uses 'fpart' <https://sourceforge.net/projects/fpart/> 
to bypass the need for a full recursive descent of the dir trees before 
the actual transfer starts.

VERSION

if (! @ARGV) {usage();} # in case someone doesn't know what to do.

&GetOptions(
  "startdir=s"       =>   \$ROOTDIR,     # Have to be able to set rootdir -> SRC in rsync
  "rsyncopts=s"      =>   \$RSYNCOPTS,   # passthru to rsync as a string
  "NP=i"             =>   \$NP,          # number of rsync processes to start
  "chunksize=s"      =>   \$FPARTSIZE,   # the size that fpart chunks (allow PpTtGgMmKk)
  "reusecache!"      =>   \$REUSECACHE,  # dont re-read dirs, re-use existing ones.
  "checkperiod=i"    =>   \$CHECKPERIOD, # # of sec between system load checks
  "maxbw=i"          =>   \$MAXBW,       # max bw to use (--bwlimit=KBPS passthru to rsync)
  "maxload=f"        =>   \$MAXLOAD,     # max system load - if > this, sleep rsyncs
  "email=s"          =>   \$EMAIL,       # email to notify when finished
  "interface=s"      =>   \$NETIF,       # network interface to use if multiple ones
  "nowait!"          =>   \$NOWAIT,      # sleep a few s rather than wait for a user ack
  "help!"            =>   \$HELP,        # dump usage, tips
  "version!"         =>   \$VERSION,     # duh..
  "debug!"           =>   \$DEBUG,       # requests more developer-level info
);

if (! defined $QUIET) {$QUIET = 0;}

## Set up run-permanent variables.
$DATE=`date +"%T_%F" | sed 's/:/./g' `; chomp $DATE;
$parsync_dir = $HOME . "/.parsync";

if (defined $VERSION) { print $PARSYNCVER; exit;}
if (!defined $RSYNCOPTS) {$RSYNCOPTS = "";}
if (defined $HELP) {usage();}

check_utils(); # check that the required utilities are on the system

### get the current system stats:  #CPUs, load, bandwidth, etc
# CPUs
$NCPUs = `cat /proc/cpuinfo | grep processor | wc -l`; chomp $NCPUs;
$loadavg = `cat /proc/loadavg | tr -d '\n'`;
@SYSLOAD = split (/\s+/, $loadavg); # 1st 3 fields are 1, 5, 15m loads
# so as long as the 1m load / NCPUs < 1, we're fine; if > 1, we may want to start throttling..
$LOAD1mratio = $SYSLOAD[0] / $NCPUs;

if (! defined $NETIF) {$NETIF = "eth0";}
if (! defined $CHECKPERIOD || $CHECKPERIOD < 5) {$CHECKPERIOD = 5;} # really 10s due to the 5s of network sampling
elsif ($CHECKPERIOD >= 5) {$CHECKPERIOD -= 5;}
if (! defined $NP){$NP = int(sqrt($NCPUs)+ 0.5);} # round sqrt(NCPUs) (hyperthreaded if Intel) 8 -> 3
if (! defined $MAXBW) {$MAXBW = 1000000;} # essentially unlimited
else {$MAXBW = int($MAXBW / $NP + 0.5);} # users expect total maxbw; so have to divide by NP.
if (! defined $MAXLOAD) {$MAXLOAD = $NP + 2 ;} #  + 1 for IO load
if (! defined $ROOTDIR) {$ROOTDIR = `pwd`; chomp $ROOTDIR;}  # where all dirs must be rooted.
if (! defined $FPARTSIZE) {$FPARTSIZE = "10G"; $FPARTSIZE_N = 104857600;} # default is 10Gish
elsif ($FPARTSIZE =~ /[PpTtGgMmKk]/) {$FPARTSIZE_N = ptgmk($FPARTSIZE); }
else {$FPARTSIZE_N = $FPARTSIZE;}
if ($DEBUG) {&debug(__LINE__, "FPARTSIZE = $FPARTSIZE\nFPARTSIZE_N = $FPARTSIZE_N");}

# get some network info
if ($NETIF =~ /eth/) {
  $IF_SPEED = `ethtool eth0 2> /dev/null | grep Speed | cut -f2 -d:`;}
elsif ($NETIF =~ /wlan/) {
  $IF_SPEED = `iwconfig wlan0 | grep -i quality`;
} elsif ($NETIF =~ /ib/) {
  $IF_SPEED = `ibstat | grep Rate | head -1 | sed -e 's/^[ \t]*//'`;
  $IF_SPEED = "IB:" . $IF_SPEED;
}
chomp $IF_SPEED;

if ($DEBUG){
  print "\tEVAL: Using network interface [$NETIF] with connection quality [$IF_SPEED]\n\n";}
  if ($SYSLOAD[0] < $MAXLOAD){
  if ($DEBUG){
    print "\n\tEVAL: 1m load is [$SYSLOAD[0]] and the 1m Load:#CPU ratio is [$LOAD1mratio] ( [$NCPUs] CPU cores).
	    OK to continue.\n "
  }
} else {
  print "\n!!WARN: 1m System load is > [$SYSLOAD[0]].  The 1m Load:#CPU ratio is [$LOAD1mratio].\n Continue? [Cntrl+C to interrupt; Enter to continue]\n ";
  pause();
}

if (-d $parsync_dir) {
  my $ls = `ls -l $parsync_dir`;
  print <<LS;

WARN: The parsync cache dir [$parsync_dir]
already exists and may contains old cache and log files.
The complete list of the dir is:
--------------------------------------------------------------------
$ls
--------------------------------------------------------------------
If you want to copy, clear or modify the cache files, please [Ctrl+C],
process the files and start again.
If you specified the '--nowait' option, I'll continue in 5s.
LS
  if ($NOWAIT){ sleep 5;}
  else {pause();}
} elsif (!-d $parsync_dir) {
  mkdir  $parsync_dir or die "FATAL: Can't mkdir [$parsync_dir]\n";
}

$FP_ROOT_DIR = $parsync_dir . "/fpcache";
if (! -d $FP_ROOT_DIR) {mkdir $FP_ROOT_DIR or die "FATAL: Can't make 'FP_ROOT_DIR' [$FP_ROOT_DIR]\n";}
$FP_ROOT = $FP_ROOT_DIR . "/fpc";  # the root name of the fpart chunk files fpc.1, etc
$PIDFILE = $FP_ROOT_DIR . '/' . "rsync-PIDs" . '-' . $DATE;
$FPART_LOGFILE = $FP_ROOT_DIR . '/' . "fpart.log." . $DATE;
$FP_PIDFILE = $FP_ROOT_DIR . '/' . "FP_PIDFILE" . $DATE;
$hdr_rpt = 20; # nbr of lines to repeat the header
$hdr_cnt = 21;  # header counter; > $hdr_rpt so it gets printed 1st time

# this takes care of the last ARGV so that all the rest of the words are target dirs&files
$TARGET = $ARGV[$#ARGV]; # remote rsync target
if (!defined $TARGET ){die "\n\nXX FATAL XX: No target defined! Where you gonna put this stuff??!?\nTry $0 --help for the built-in help.\n"}
$#ARGV--;

# now process the dirs
$dcnt = 0;
$fnd2r = "";  # zero the list of 'files 'n' dirs to rsync'
$dirtmp = shift; # should only be dir/files left once getopt finishes (see above)

# If there are no files or dirs defined, take the current dir
if (!defined $dirtmp) { $dirtmp = `pwd`;}
#print "DEBUG: dirtmp = [$dirtmp]\n";
while (defined $dirtmp) { # should work on explicitly named dirs as well as globs.
  $dirtmp = $ROOTDIR . '/' . $dirtmp;
  if (! -r $dirtmp){ # quick check to see if its readable.
    print "WARN: [$dirtmp] isn't readable; either it's not where you think it\nis or you need to escalate your privs.  Regardless, it won't be transferred in this run.\n";
  } else {  # otherwise, add the file to list to be chunked and  transferred.
    $fnd2r .= $dirtmp . " ";
  }
  $dirtmp = shift;
}
$#ARGV++; # now incr to allow the TARGET to be captured.
my @cachefiles = (); # will populate with list of cachefiles to process together.

### TODO REUSECACHE
my $rsls = `ls -1 $FP_ROOT_DIR`;
#print "DEBUG: rsls = $rsls\n";
my $glob =  $FP_ROOT_DIR . "/fpc.*";
if ($rsls =~ 'fpc') {$prev_cache = `ls -1 $glob`; }
elsif (defined $REUSECACHE){
  print "!!WARN: You chose '--reusecache', but there's no files for it. Unsetting that option\n\n.";
  undef $REUSECACHE; sleep 1;
}

## This is the big REUSECACHE SECTION.  ONlY enter if want to REUSECACHE
if (defined $REUSECACHE && -d $FP_ROOT_DIR){
  print "!!WARN: NOT GENERATING NEW CACHE; RE-USING ALL OF PREVIOUS CACHE.
This includes the following cache files from [$FP_ROOT_DIR]:
--------------------------------------------------------------------
$prev_cache
--------------------------------------------------------------------
If you want to ignore some of these cachefiles, delete them or move them out of the way.
Hit [CTRL + C] to cancel or .. ";
  if ($NOWAIT){
    print " Actually... Not waiting.  You have 5 sec to cancel.\n";
    sleep 5;
  } else{ pause(); }

  # now have to populate the @cachefiles array from the existing cachefiles
  my $nn = @cachefiles = split(/\n/,$prev_cache);
  # this ends up with an array of cache files, but since they're being used as-is
  # we really don't need to process the list - just use as-is
} else {  # forking the fpart partitioner
  # have to unlink all the old cache files first to prevent overlaps
  print "WARN: about to remove all the old chunkfiles you didn't want.
  You've got 5s to ^C to stop me!\nOtherwise, they're gone\n";
  sleep 5;
  unlink glob "$glob"; 
  print "WARN: OK, they're gone.. continuing\n";
  
  # make sure there's no other fpart's running.
  my $fparts_already_running = `ps aux | grep 'fpar[t]'`; chomp $fparts_already_running;
  if ($fparts_already_running ne ''){
    print "WARN: one or more 'fpart's are already running:
    ====
    [$fparts_already_running]
    ====
    Unless you know that these fparts are valid and not left over from previous 
    parsyncfp's, you should exit and kill them off before restarting this run.
    Continue? [Ny] ";
    my $ansr = <STDIN>;
    if ($ansr =~ /[yY]/) { print "\nOK, continuing\n"; }
    else {die "Fine - clear up the confusion and try again.\n"}
  }
  my $x = 0;
  my $cmd = "fpart -v -L -s $FPARTSIZE_N -o $FP_ROOT $fnd2r  2> $FPART_LOGFILE & echo \"\${!}\" > $FP_PIDFILE";
  # That captures the child PID!
  if ($DEBUG) {print "DEBUG: fpart fork cmd:\n[$cmd]\n";}
  if ($FPART_PID = fork) { print "\nINFO: Forking fpart..\nCheck [$FPART_LOGFILE] for errors if it hangs.\n"; }
  else {
  system "$cmd";
  $FPART_PID = `cat $FP_PIDFILE`; chomp $FPART_PID;
  exit(0); # it's forked, now exit this stanza
  }
  # fpart has been forked; wait for enough chunkfiles to be written to start the rsyncs
  while (! -e $FP_PIDFILE) { sleep 1;
    #print "INFO: Waiting for fpart to be forked..\n";
  }
  $FPART_PID = `cat $FP_PIDFILE`; chomp $FPART_PID;
  my $ready2start = my $waitcnt = $NBR_FP_FLES = 0;
  my $fp0 = $FP_ROOT . ".0";
  my $fp1 = $FP_ROOT . ".1";
  my $done = 0;
  while ($ready2start == 0) {
    if (-e $fp0) { print "INFO: [$fp0] visible.\n"; $NBR_FP_FLES++; $ready2start=1; }
    $waitcnt++;
    print "INFO: Waiting [$waitcnt] s for chunk files to be written\r";
    sleep 1;
  }
} # forking the fpart partitioner

# start up NP rsyncs 1st, then cycle every CHECKPERIOD, checking # of rsyncs still going and
# starting new ones as needed until the chunkfiles are exhausted.

my $STILL_FP_CHUNKS = my $KEEPGOING = 1;
my $FPCFS = $FP_ROOT . '.'; # FP Chunk File Stem
my $NBR_FP_FLES = `ls -1 $FPCFS* | wc -l`; chomp $NBR_FP_FLES;
$RSYNCS_GOING = $CUR_FPI = 0; # $CUR_FPI = current FP index

print "\nINFO: Starting the 1st [$NP] rsyncs ..\n";
my $sc = 0;
while ($RSYNCS_GOING < $NP && $KEEPGOING) { #
  $CUR_FP_FLE = $FP_ROOT . "." . $CUR_FPI ; # the current fp chunkfile
  if (-e $CUR_FP_FLE) { # if the current chunkfile exists
    fixfilenames($CUR_FP_FLE, $ROOTDIR);  # check & fix for spaces, bad chars.
    # entire rsync command and PID capture (used in total of 2 places)
    $logfile = $parsync_dir . '/' ."rsync-logfile-" . $DATE . "_" . $CUR_FPI;
    $RSYNC_CMD = "rsync  --bwlimit=$MAXBW  $RSYNCOPTS -a --log-file=$logfile --files-from=$CUR_FP_FLE  $ROOTDIR  $TARGET  & echo \"\${!}\" >> $PIDFILE";
    # there will be as many logfiles as fp chunkfiles.
    # ie LOTS. but they can be deleted after the run has been verified..
    # TODO don't know if we need this logfile.
    system("$RSYNC_CMD"); # launch rsync and capture the bg job PID to PIDfile
    $CUR_FPI++;
    $RSYNCS_GOING++;
  } else { # there aren't any more fp chunk files waiting, so check to see if it's finished.
    $FPART_RUNNING = `ps aux | grep fpar[t] | grep $FPART_PID | wc -l`; chomp $FPART_RUNNING;
    if ($FPART_RUNNING eq '0') {
      # so if it's done, then we're done.  No more chunk files, so no more rsyncs to start.
      $KEEPGOING = 0; # signal the while loop to break.
    } else { # fpart is still going so wait for the next fpart chunkfile to be finished.
      print "INFO: waiting [$sc]s for next chunkfile [$CUR_FP_FLE]\r"; sleep 2; $sc += 2;
    }
  }
}  #while ($RSYNCS_GOING < $NP && $KEEPGOING)
print "\n\n";

# so at this point either we've loaded all the rsyncs up to NP or we've completely finished.
# If the latter, say good bye.  If the former, then we have to keep launching
# rsyncs up to NP until we've used up all the fpart chunkfiles.

$sPIDs = ""; # running PIDs launched by parsync, suspended PIDs (strings)
$NBR_FP_FLES = `ls -1 $FPCFS* | wc -l`; chomp $NBR_FP_FLES; # get current # of chunks
my @aprPIDs; # all recorded parsync rsync PIDs ever started
my @crrPIDs; # currently RUNNING parsync rsync PIDs.
my @csrPIDs; #currently SUSPENDED parsync rsync PIDs.

### FOLLOWING IS THE MAIN PARSYNC-FPART LOOP

if ($REUSECACHE) {$FP_RUNNING = 0;}
else { $FP_RUNNING = `ps aux | grep $FPART_PID | grep fpar[t] | wc -l`; chomp $FP_RUNNING;}

while ($CUR_FPI < $NBR_FP_FLES || $FP_RUNNING || $STILLRSYNCS ) {
  $rPIDs = "";
  # print the header
  if ($hdr_cnt++ > $hdr_rpt) {
    my $glob = $FP_ROOT . "*"; $hdr_cnt = 0;
    $nbr_cur_fpc_fles = `ls -1 $glob | wc -l`; chomp $nbr_cur_fpc_fles;
    print "     Timestamp       |  1m Load  | BW [$NETIF]   |   Running PIDs   ||   Suspended PIDs  || Chunk [$CUR_FPI] of [$nbr_cur_fpc_fles]\n";
  }
  
  ($rPIDs, $crr) = get_rPIDs($PIDFILE, $sPIDs);

  # now get load, bw, etc, and start rsyncs on new chunkfiles or suspend them to
  # load-balance

  $loadavg = `cat /proc/loadavg | tr -d '\n'`; # What's the system load?
  @SYSLOAD = split (/\s+/, $loadavg); # 1st 3 fields are 1, 5, 15m loads
  $LOAD1mratio = $SYSLOAD[0] / $NCPUs;
  my $meanbw = "0";
  # What's the 5s mean BW? (contributes 5s to periodicity of updates)
  # TODO: this can be improved by sampling in the background. (fork ifstat to record
  # continuously and just sample the last X samples.
  $meanbw = `ifstat -i $NETIF 1 5 | tail -5 | cut -c9-19 | stats --quiet | grep Mean | cut -c 7-19`;
  chomp $meanbw;

  # print out current data with the date
  $rPIDs =~ s/^\s+|\s+$//g ; $sPIDs =~ s/^\s+|\s+$//g ;  # trim leading & trailing whitespace
  if ($rPIDs eq "" ){$rPIDs = "No current PIDs (used all chunk files); start more next cycle"}
  my $rDATE=`date +"%T_%F" | sed 's/:/./g' `; chomp $rDATE;
  printf "$rDATE     %5.2f   %12.2f     [%s]   ||    [%s]\n",
             $SYSLOAD[0],   $meanbw ,   $rPIDs,  $sPIDs;

### SUSPEND OR CONTINUE RSYNCS for LOADBALANCING
  if ($SYSLOAD[0] > $MAXLOAD) {
    # suspend a PID; then loop as normal. If still high, will continue to
    # suspend PIDs until there's none left.
    if ($DEBUG) {print "\nDEBUG: System load [$SYSLOAD[0]] is > MAXLOAD [$MAXLOAD].  Will try to suspend a running rsync to shed load.\n";}
    # reassign a new list from ONLY RUNNING PIDs to $rPIDs (refresh $rPIDs)
    # this cmd picks up both suspended and running PIDs- have to remove the suspended ones.
    # in an efficient way.
    if ($rPIDs =~ /\d+/) {$rPIDs = `ps -p $rPIDs | grep -v PID| cut -c 1-5 | tr '\n' ' '`;}
    $rPIDs =~ s/^\s+|\s+$//g ; # trim leading and trailing
    # turn it into an array - (-> sub?)
    my $rn = my @ra = split(/\s+/, $rPIDs);
    my $sn = my @sa = split(/\s+/, $sPIDs);
    for (my $r=0; $r< $rn; $r++) {
      for (my $s=0; $s< $sn; $s++) {
      if ($ra[$r] eq $sa[$s]) {$rPIDs =~ s/$ra[$r]//g;} # delete it from $rPIDs
      }
    }
    # picks up both suspended and running PIDs
    # and the new result has to have something in it as well.
    if ($rPIDs =~ /\d+/){ # if any still left
      my $N = my @raPIDs = split(/\s+/, $rPIDs); my $e = 0; #@raPIDs = temp array to carry currently running PIDs
      while ($e <= $N && $raPIDs[$e] !~ /\d+/){$e++};
      if ($DEBUG) {print "\t\tDEBUG:got one: [$raPIDs[$e]]; will now suspend it\n";}
      kill 'STOP', $raPIDs[$e];
      if ($sPIDs !~ /$raPIDs[$e]/) { # If it's not there already
	$sPIDs = "$sPIDs" . ' ' . "$raPIDs[$e]"; # transfer rPID to sPID.
	$rPIDs =~ s/$raPIDs[$e]//g; # only then delete that PID fr the rPID string
      }
    } else { # there aren't any more PIDs left - all done or killed off.'
      print "\tINFO: No more running rsync PIDs left [$rPIDs].  All rsyncs are suspended [$sPIDs].\n";
    }
  } elsif ($sPIDs =~ /\d+/) { # if there are sPIDs, unsuspend them one by one
    # split em
    my $N = my @saPIDs = split(/\s+/, $sPIDs); my $e = 0;
    while ($e <= $N && $saPIDs[$e] !~ /\d+/){$e++};
    if ($DEBUG) { print "\t\tDEBUG:got one: [$saPIDs[$e]]; will now UNsuspend it\n";}
    kill 'CONT', $saPIDs[$e];
    $rPIDs = "$rPIDs" . ' ' . "$saPIDs[$e]"; # transfer sPID to rPID.
    $sPIDs =~ s/$saPIDs[$e]//g; # delete that PID fr the sPID string
  } # end of 'SUSPEND OR CONTINUE to LOADBALANCE.' test loop
  # and if neither of those conditions are met, then we can launch another rsync.
  elsif  ($crr < $NP) { # then launch another rsync with the next fpart chunkfile
    $CUR_FP_FLE = $FP_ROOT . "." . $CUR_FPI ; # generate the next fpart chunk file with $CUR_FPI
      # if fpart is still going, wait for the next chunkfile to show up
    my $cfw = 0;
    if ($REUSECACHE) {$FPART_RUNNING = 0;}
    else { $FPART_RUNNING = `ps aux | grep fpar[t] | grep $FPART_PID | wc -l`; chomp $FPART_RUNNING;}
    while (! -e $CUR_FP_FLE && $FPART_RUNNING eq '1'){
      print "INFO: Waiting [$cfw] s for next chunkfile..\r"; sleep 2; $cfw += 2;
    }
    
    ($rPIDs, $crr) = get_rPIDs($PIDFILE, $sPIDs);
    my $n = my @a = split(/\s+/, $rPIDs);
    my $R2SU = $NP - $n; # this is the number of rsyncs to start up
    my $glob = $FP_ROOT . "*";
    my $nbr_cur_fpc_fles = `ls -1 $glob | wc -l`; chomp $nbr_cur_fpc_fles;
    # $fparts_already_running will be '' if it's finished running.
    my $fparts_already_running = `ps aux | grep 'fpar[t]'`; chomp $fparts_already_running;
    # Check this more carefully for exceptions - this is the drop-dead error point
    # in some situations
    for ($n=0; $n<$R2SU; $n++) {
      while  (($CUR_FPI >= $nbr_cur_fpc_fles) && $fparts_already_running ne '') {
	print "DEBUG[l398]: CUR_FPI=$CUR_FPI > nbr_cur_fpc_fles=$nbr_cur_fpc_fles?\n";
	print "INFO: Waiting for fpart to get ahead of the transfer\n"; 
	$nbr_cur_fpc_fles = `ls -1 $glob | wc -l`; chomp $nbr_cur_fpc_fles;
	sleep 2;
      }
      $logfile = $parsync_dir . '/' ."rsync-logfile-" . $DATE . "_" . $CUR_FPI;
      $nbr_cur_fpc_fles = `ls -1 $glob | wc -l`; chomp $nbr_cur_fpc_fles;
      $RSYNC_CMD = "rsync  --bwlimit=$MAXBW -a --log-file=$logfile $RSYNCOPTS  --files-from=$CUR_FP_FLE  $ROOTDIR  $TARGET & echo \"\${!}\" >> $PIDFILE";
      if ($DEBUG) {print "\nDEBUG: Starting [$RSYNC_CMD]\n"; }
	if (-e $CUR_FP_FLE) {
	  fixfilenames($CUR_FP_FLE, $ROOTDIR);  # check & fix for spaces, bad chars.
	  print "INFO: Starting another rsync with chunk file [", ($CUR_FPI+1), "] of [$nbr_cur_fpc_fles]\n";
	  system("$RSYNC_CMD"); # capture the bg job PID to PIDfile
	  $CUR_FPI++;
	}
    }
    ($rPIDs, $crr) = get_rPIDs($PIDFILE, $sPIDs);
  }
  sleep $CHECKPERIOD;
  $NBR_FP_FLES = `ls -1 $FPCFS* | wc -l`; chomp $NBR_FP_FLES; # get current # of chunks
  if ($rPIDs =~ /\d+/) {$STILLRSYNCS = 1;}
  else {$STILLRSYNCS = 0;}
}  # while ($CUR_FPI < $NBR_FP_FLES )

my $host = `hostname`;
if (defined $EMAIL){system("echo 'all rsyncs done' | mail -s 'parsyncfp on host [$host] completed' $EMAIL");}

# remind user how much storage the cache takes and to clear the cache files 
my $du_cache = `du -sh $parsync_dir`;

print "WARN: The parsync cache dir [$parsync_dir] takes up [$du_cache]
Don't forget to delete it, but wait until you are sure that your job
completed correctly, so that you can re-use it if necessary.\n";
exit;


# ================= subroutines =================


# Define utilities required to run this version of parsync
sub check_utils {
  my %UTILS = (  # required utils to help this run correctly
    "ethtool"   => "",
    "iwconfig"  => "",
    "ifstat"    => "",
    "stats"     => "",
    "fpart"     => "",
    "scut"      => "",
  );
  # and check that they can be found..
  my $utilsz = keys %UTILS;
  foreach my $util (keys %UTILS){
    my $utilpath = `which $util | tr -d '\n'`;
    if ($utilpath !~ /$util/){
      print "!!WARN: [$util] not found.  you can find 'stats', scut, and 'fpart' here:
	<http://moo.nac.uci.edu/~hjm/parsync/utils>
      and the rest via yum, apt-get, or google.\n";
      die "\n\nFATAL: [$util] isn't on your PATH [$PATH]; Please install it or correct your PATH variable to include it.\nTry ''module load perl'' or use cpan to install it.\n\n"
    } else {$UTILS{$util} = $utilpath;
      if ($DEBUG){print "\tEVAL: Found [$util] at [$utilpath].\n"}
    }
  }
}


# usage:  ($rPIDs, $crr) = get_rPIDs($PIDFILE, $sPIDs);
sub get_rPIDs($$) {
# Inputs
  my $pidfile = shift; # string name of PIDFILE
  my $spids = shift;   # suspended PIDs in a string.
 
  my @aprPIDs = ();
  my $NSusPIDs = 0; 
  my @SusPIDs;
  my $rpids = ""; # to be generated and returned as a string
  my @crrPIDs = (); # array that holds the currently running rsync PIDs
  my @ASRP;  # All System Rsync PIDs
  my $NASRP;
  my $crr = 0;# currently running rsyncs counter
  my @crrPIDs = ();
  my $apr = 0;  # all parsync rsync PIDs
  # how many rsyncs are running?  Check the PIDFILE against the rsync PIDs that are running
  # if there are other rsyncs running, their PIDs won't be in the PIDFILE.
  # so have to do a diff of the PIDFILE vs all PIDs of rsyncs running.
  my $ALL_SYS_RSYNC_PIDS = `ps aux | grep rsyn[c] | scut -f=1 | sort -g | tr '\n' ' '`;  chop $ALL_SYS_RSYNC_PIDS;
  $NASRP = @ASRP = split(/\s+/, $ALL_SYS_RSYNC_PIDS);
  open (PIDFILE, "<$pidfile") or die "\nFATAL: Can't open PIDFILE [$pidfile]'.\n";
  # PIDs from the PIDFILE to compare system rsyncs (could be multiple going)
  # with parsync-launched rsyncs
  while (<PIDFILE>) {chomp;  $aprPIDs[$apr++] = $_; } # all parsync rsync PIDs
  close PIDFILE;
  # if there are any PIDs in the $spids string, split into an array
  if ($spids =~ /\d+/) { $NSusPIDs = @SusPIDs = split(/\s+/, $spids); }
  $rpids =~ s/^\s+|\s+$//g ; $spids =~ s/^\s+|\s+$//g ; # strip leading/trailing spaces
  # suboptimal I know, but the arrays are so small it doesn't matter.
  for (my $a=0; $a<$NASRP; $a++) {
    for (my $b=0; $b<$apr; $b++) {
      # if they match, they're MY rsyncs AND they're running
      if ($ASRP[$a] eq $aprPIDs[$b]) {
	$crrPIDs[$crr++] = $aprPIDs[$b];
      }
    }
  }
  # dump @crrPIDs into $rpids
  $rpids = join(" ", @crrPIDs);
  $crr--; # trim off the extra incr
  
  # now mask out the sPIDs from the rPIDs list; works but ugly!
  $spids =~ s/^\s+|\s+$//g ;
  if ($spids =~ /\d+/) { # if there are any spids
    $NSusPIDs = @SusPIDs = split(/\s+/, $spids);
    for (my $r=0; $r<$NSusPIDs; $r++) {
      for (my $b=0; $b<$apr; $b++) {
	# if a sPID == rPID, delete the PID from the $rPIDs string
	if ( $SusPIDs[$r] eq $aprPIDs[$b]) { $rpids =~ s/$aprPIDs[$b]//g;}
      }
    }
  }
  return ($rpids, $crr);
}


sub pause {
    print "Press [ENTER] to continue.\n";
    my $tmp = <STDIN>;
}

# call as [debug(__LINE__, "string")] to print line # and debug string
sub debug($$) {
	my $line = shift;
	my $msg = shift;
	print STDERR "DEBUG[$line]: $msg\n";
	pause;
}

# fixfilenames reads in a file of filenames and iterates over them, fixing their
# names and emitting useful warning if something goes odd.
sub fixfilenames {
  my $FN = shift;
  my $startdir = shift; $startdir .= '/';
  my $fpnew =  $FN . ".new";
  open (FP, "< $FN") or die "ERROR: Can't open fp file [$FN]\n.";
  open (FPN, "> $fpnew") or die "ERROR: Can't open replacement file [$fpnew]\n.";
  my $lc = my $fws = my $verified = my $failed = 0;
  while (<FP>) {
    chomp;
      if ($_ =~ / /) {  # subst all spaces with '\ '
	$fws++;  s/ /\ /g;
      }
      # and also delete off the startdir
      s/$startdir//g;
      print FPN "$_\n";
  }
  close FP;  close FPN;
rename $fpnew, $FN; # and then rename the new one to the original
}


# ptgmk converts values suffixed with [PpTtGgMmKk] to bytes correctly
# uses the 1024 bytes/kb as oppo to 1000
sub ptgmk {
  my $instr = shift;
  # trim spaces from back and front
  $instr =~ s/^\s+|\s+$//g;
  my $abbr = chop $instr;
  my $nbr = $instr;
  if ($abbr !~ /[PpTtGgMmKk]/) {die "\n\nFATAL: tgmk() input doesn't contain [TtGgMmKk], so nothing to convert.\n\n"; }
  if ($abbr =~ /[Kk]/) {$nbr *= 1024; return $nbr;}
  if ($abbr =~ /[Mm]/) {$nbr *= 1048576; return $nbr;}
  if ($abbr =~ /[Gg]/) {$nbr *= 1073741824; return $nbr;}
  if ($abbr =~ /[Tt]/) {$nbr *= 1.09951162778e+12; return $nbr;}
  if ($abbr =~ /[Pp]/) {$nbr *= 1.12589990684e+15; return $nbr;}
}

sub usage {
  my $helpfile = "$HOME/.parsync/parsync-help.tmp";
  if (! -d "$HOME/.parsync") {mkdir "$HOME/.parsync";}
  open HLP, ">$helpfile" or die "Can't open the temp help file [$helpfile]\n";
  my $helptxt = <<HELP;

$PARSYNCVER

The only native rsync option that parsync uses is '-a (archive).  If you
need more, then it's up to you to provide them via '--rsyncopts'.
parsync checks to see if the current system load is too heavy and tries
to throttle the rsyncs during the run by monitoring and suspending
/ continuing them as needed.

parsync uses fpart <http://goo.gl/K1WwtD> to create chunkfiles for rsync
to read, bypassing the need to wait for a complete recursive scan.

It appropriates rsync's bandwidth throttle mechanism, using '--maxbw'
as a passthru to rsync's 'bwlimit' option, but divides it by NP so
as to keep the total bw the same as the stated limit.  It monitors and
shows network bandwidth, but can't change the bw allocation mid-job.
It can only suspend rsyncs until the load decreases below the cutoff.
If you suspend parsync (^Z), all rsync children will suspend as well,
regardless of current state.

Unless changed by '--interface', it assumes and monitors eth0.  The
transfer will use whatever interface normal routing provides, normally
set by the name of the target.  It can also be used for non-host-based
transfers (between mounted filesystems) but the network bandwidth continues
to be (pointlessly) shown.

[[NB: Between mounted filesystems, parsync sometimes works very poorly for
reasons still mysterious.  In such cases (monitor with 'ifstat'), use 'cp'
for the initial data movement and a single rsync to finalize.  I believe
the multiple rsync chatter is interfering with the transfer.]]

It only works on dirs and files that originate from the current dir (or
specified via "--startdir").  You cannot include dirs and files from
discontinuous or higher-level dirs.

** the [$parsync_dir] files **
The [$parsync_dir] dir contains the cache dir (fpcache), and the
time-stamped log files. The cache files can be re-used with '--reusecache'
(which will re-use ALL the chunk files.  The log files are datestamped and
are not NOT overwritten.

** Odd characters in names **
parsyncfp will refuse to transfer some oddly named files (tho it should copy
filenames with spaces fine.  Filenames with embedded newlines, DOS EOLs,
and some other odd chars will be recorded in the log files in the 
[$parsync_dir] dir.

OPTIONS
=======
[i] = integer number
[f] = floating point number
[s] = "quoted string"
( ) = the default if any

--NP [i] (sqrt(#CPUs)) ................  number of rsync processes to start
       optimal NP depends on many vars.  Try the default and incr as needed
--startdir [s] (`pwd`)  ....................  the directory it starts at(*)
--maxbw [i] (unlimited) ..........  in KB/s max bandwidth to use (--bwlimit
       passthru to rsync).  maxbw is the total BW to be used, NOT per rsync.
--maxload [f] (NP+2)  .............  max system load - if sysload > maxload,
                                                sleeps an rsync proc for 10s
--chunksize [s] (10G) ... aggregate size of the files allocated to one rsync
                      process.  Can specify in 'human' terms [100M, 50K, 1T]
                      as well as integer bytes.
--rsyncopts [s]  ....  options passed to rsync as a quoted string (CAREFUL!)
	 this opt triggers a pause before executing to verify the command(+)
--interface [s]  ........  network interface to monitor (not use; see above)
--reusecache  ..........  don't re-read the dirs; re-use the existing caches
--email [s]  .....................  email address to send completion message
--nowait  .............  for scripting, sleep for a few s instead of pausing
--version  .................................  dumps version string and exits
--help  .........................................................  this help

(*) you can use globs/regexes with --startdir, but only if you're at that
point in the dir tree. ie: if you're not in the dir where the globs can be
expanded, then the glob will fail.  However, explicit dirs can be set from
anywhere if given an existing startdir.

(+) the '--rsyncopts' string can pass any rsync option to all the rsyncs that
wil be started.  This allows options like '--exclude-from' to filter out
unwanted files.

Examples
========
(Good example)
% parsync  --maxload=5.5 --NP=4 --startdir='/home/hjm' dir[123]  \
hjm\@remotehost:~/backups

where
  = "--startdir='/home/hjm'" sets the working dir of this operation to
      '/home/hjm' and dir1 dir2 dir3 are subdirs from '/home/hjm'
  = the target "hjm\@remotehost:~/backups" is the same target rsync would use
  = "--NP=4" forks 4 instances of rsync
  = -"-maxload=5.5" will start suspending rsync instances when the 1m system
      load gets to 5.5 and then unsuspending them when it goes below it.

  It uses 4 instances to rsync dir1 dir2 dir3 to hjm\@remotehost:~/backups


(Good example)
% parsync --reusecache  --NP=3 --barefiles  *.txt   /mount/backups/txt

where
  = "--reusecache" indicates that the filecache shouldn't be re-generated,
    uses the previous filecache in ~/.parsync
  = "--NP=3" for 3 copies of rsync (with no "--maxload", the default is 4)
  = "--barefiles" indicates that it's OK to transfer barefiles instead of
    recursing thru dirs.
  = "/mount/backups/txt" is the target - a local disk mount instead of a network host.

  It uses 3 instances to rsync *.txt from the current dir to "/mount/backups/txt".


(Good example)
parsyncfp   --checkperiod 6  --NP 3 --interface eth0  --chunksize=87682352 \
   --rsyncopts="--exclude='[abc]*'"  nacs/fabio   hjm\@moo:~/backups

The above command shows several options used correctly:

--chunksize=87682352 - shows that the chunksize option can be used with explicit
integers as well as the human specifiers (TGMK).

--rsyncopts="--exclude='[abc]*'" - shows the correct form for excluding files
based on regexes (note the quoting)

nacs/fabio - shows that you can specify subdirs as well as top-level dirs (as
long as the shell is positioned in the dir above, or has been specified via
'--startdir'


(Error Example)
% pwd
/home/hjm  # executing parsync from here

% parsync --NP4  /usr/local  /media/backupdisk

why this is an error:
  = '--NP4' is not an option (parsync will say "Unknown option: np4"
    It should be '--NP=4' or '--NP 4'
  = if you were trying to rsync '/usr/local' to '/media/backupdisk', it will
    fail since there is no /home/hjm/usr/local dir to use as a source.
    This will be shown in the log files in ~/.parsync/rsync-logfile-<datestamp>_#
    as a spew of "No such file or directory (2)" errors

The correct version of the above command is:

% parsync --NP=4  --startdir=/usr  local  /media/backupdisk

HELP

  print HLP $helptxt;
  close HLP;
  system("less -S $helpfile");
  unlink $helpfile;
  die "Did that help?.\n";
}