#!/usr/bin/env perl use strict; use Getopt::Long; # for std option handling: -h --yadda=badda, etc use Socket; use Env qw(HOME PATH); use File::Path qw(remove_tree make_path); use Term::ANSIColor; # for alarms # after significant changes, update the tarball and cp to moo for distribution; update the github # fn="/home/hjm/bin/parsyncfp"; cd ; cp $fn ~/parsyncfp/; tar -cvzf parsyncfp+utils.tar.gz parsyncfp; scp parsyncfp+utils.tar.gz moo:~/public_html/parsync ; # copy to all the local hosts # scp $fn moo:~/public_html; scp $fn moo:~/bin; scp $fn dabrick:~/bin; ssh -t moo 'scp bin/parsyncfp hmangala@hpcs:/data/hpc/bin'; # don't forget!! using git! Add changes to changelog in the README.md file # cd ~/gits/parsyncfp; cp ~/bin/parsyncfp .; git add parsyncfp README.md; git commit -m 'commit message'; git push # check github for bug reports. # TODO # - integrate pmj into pfp? use somthing like [xterm -e "cd /path/to/pmj/dir; pmj shell start file; wait" # - --pmj=/path/to/pmj dir # - starts an xterm and sends output there, opens the gnuplot window # - [x] changed the calc for determining TCP network bandwidth to reference /proc/net/dev which should # be more reliable across distro's and maybe even OSs. However, this won't detect RDMA data. For that, # need perfquery. # - [x] addded RDMA support (if the interface =~ ib, then it will try to use perfquery to measure the RDMA # bandwidth # - [x] check the sequencing for the use of the alt-cache option to make sure that things are being # deleted or not in the right sequence. # - [x] write funcs to color different outputs different colors based on what they are - # blue for INFO, orange for WARNINGs, red for ERRORs, # - check that fpart can generate at least the # of chunk that are > than NP (as below) # - port to MacOSX using hackintosh # - [x] done: fix bandwidth calculation subroutine. # - [x] done: check for fpart before running. # - [x] done: test for '-d' or --delete' in the rsyncopts line and refer to problem with this. # - test for # of chunk files generated. emit warnings if goes above 2000 (advise to choose # a larger chunksize; or if less than NP. Don't assume a large # or even the same # as the NP #. # - [x] done: decouple the cycle time from the job start time. ie, keep monitoring the exit codes # and launch the next rsync immediately, don't wait for the checkperiod cycle, since that could be # quite long # - handle immediate, top-level dirs use vars qw($allPIDs $ALL_SYS_RSYNC_PIDS $ch $CHECKPERIOD $cmd $crr $CUR_FP_FLE $CUR_FPI $DATE $dcnt $DEBUG @DIRS @DIRS2SYNC $dirtmp $EMAIL $Filecnt %FILES $fl $fn $fnd2r $FOUT $FPART_LOGFILE $FPART_PID $FPART_RUNNING $FPARTSIZE $FPARTSIZE_N $FP_PIDFILE $FP_ROOT $cyclecnt $FP_ROOT_DIR $FP_RUNNING $hdr_cnt $hdr_rpt $HELP $IF_SPEED $VERBOSE $LOAD1mratio $loadavg $logfile $MAXBW $MAXLOAD $nbr_cur_fpc_fles $NBR_FP_FLES $NCPUs $NDIRS $NETIF $NOWAIT $NP $NP_chunk $glob $ALTCACHE $parsync_dir $PARSYNCVER $PIDFILE $PIDFILE $prev_cache $lenPID $DISPOSE $rem_host $remote $rem_path $rem_user $rootdir $rPIDs $sPIDs $ROOTDIR $RSYNC_CMD $RSYNCOPTS $RSYNCS_GOING $STILLRSYNCS $DFLT_RSYNCOPTS @SYSLOAD $TARGET $tmp $Totlsiz %UTILS $VERSION $OS $Linux $MacOSX $NETFILE $myIP $PERFQUERY $avgTCPrecv $avgTCPsend $avgRDMArecv $avgRDMAsend ); $PARSYNCVER = << "VERSION"; parsyncfp version 1.56 Dec 12th, 2018 by Harry Mangalam parsyncfp is a Perl script that wraps Andrew Tridgell's miraculous 'rsync' to provide some load balancing and parallel operation across network connections to increase the amount of bandwidth it can use. The 'fp' variant uses 'fpart' to bypass the need for a full recursive descent of the dir trees before the actual transfer starts. Do NOT try to use rsync --delete options'. More help with '--help' VERSION if (! @ARGV) {usage();} # in case someone doesn't know what to do. &GetOptions( "startdir|sd=s" => \$ROOTDIR, # Have to be able to set rootdir -> SRC in rsync "altcache|ac=s" => \$ALTCACHE, # alternative cache instead of ~/.parsyncfp "rsyncopts|ro=s" => \$RSYNCOPTS, # passthru to rsync as a string "NP|np=i" => \$NP, # number of rsync processes to start "chunksize|cs=s" => \$FPARTSIZE, # the size that fpart chunks (allow PpTtGgMmKk) "checkperiod|cp=i" => \$CHECKPERIOD, # # of sec between system load checks "maxbw=i" => \$MAXBW, # max bw to use (--bwlimit=KBPS passthru to rsync) "maxload|ml=f" => \$MAXLOAD, # max system load - if > this, sleep rsyncs "email=s" => \$EMAIL, # email to notify when finished "interface|i=s" => \$NETIF, # network interface to use if multiple ones "verbose|v=i" => \$VERBOSE, # how chatty it should be. "nowait|nw!" => \$NOWAIT, # sleep a few s rather than wait for a user ack "help!" => \$HELP, # dump usage, tips "version!" => \$VERSION, # duh.. "dispose|d=s" => \$DISPOSE, # what to do with the cache (compress, delete, leave untouched) "debug|d!" => \$DEBUG, # developer-level info; (historical) alias for '-v 3' ); ## Set up run-permanent variables. $DATE=`date +"%T_%F" | sed 's/:/./g' `; chomp $DATE; if (! defined $ALTCACHE) {$parsync_dir = $HOME . "/.parsyncfp";} else {$parsync_dir = $ALTCACHE; } #mkdir $parsync_dir; !!! $NETFILE = "/proc/net/dev"; $OS = `uname -s`; chomp $OS; $Linux = $MacOSX = 0; if ($OS =~ /Linux/) {$Linux = 1;} else {$MacOSX = 1;} $DFLT_RSYNCOPTS = "-a -s"; # the default options to pass to rsync; blanked if define $RSYNCOPTS if (defined $VERSION) { print colored(['green'], $PARSYNCVER, "\n"); exit;} if (!defined $CHECKPERIOD) {$CHECKPERIOD = 3;} if (!defined $VERBOSE) {$VERBOSE = 2;} $PERFQUERY = 0; my $fpcheck = `which fpart`; if ($fpcheck eq "") {FATAL("There's no 'fpart' executable on your PATH. Did you install it? See: https://github.com/martymac/fpart/blob/master/README");} if (!defined $RSYNCOPTS) {$RSYNCOPTS = ""; $DFLT_RSYNCOPTS = "-a -s";} else { # if def $RSYNCOPTS, then user takes all responsibility $DFLT_RSYNCOPTS = ""; if ($RSYNCOPTS =~ / -d / || $RSYNCOPTS =~ / --del/){ # user tries to pass in a 'delete' option WARN("It looks like you're trying to pass in a '--delete' option in the '--rsyncopts' string. [$RSYNCOPTS] Because parallel rsyncs don't know what the other rsyncs are doing, 'delete' options don't work well. If this is what you want to do, omit that option here and follow the parsyncfp command with a regular 'rsync --delete' command. It will be slower than a parallel operation but since most of the action will be remote deletes, it should be fairly fast. If the operation is to be performed on locally mounted filesystems (not to remote nodes), I'd strongly recommend the 'fpsync' tool, which you should have already received as part of the 'fpart' package necessary to run parsyncfp. 'fpsync' DOES provide support for a parallel '--delete', and the author provides a good explanation as to how he does this here: . HOWEVER!! Anytime you use '--delete' in an rsync operation, MAKE SURE you know what you're doing. "); exit(0); } } #if (defined $HELP || @ARGV == 0) { usage(); } if (defined $HELP) {usage($parsync_dir);} if (!defined $DISPOSE) {$DISPOSE = 'l';} # for leave untouched # check_utils(); # check that the required utilities are on the system ### get the current system stats: #CPUs, load, bandwidth, etc if ($Linux) { $NCPUs = `cat /proc/cpuinfo | grep processor | wc -l`; chomp $NCPUs; $loadavg = `cat /proc/loadavg | tr -d '\n'`; my $pid_max = `cat /proc/sys/kernel/pid_max`; $lenPID = length $pid_max; # usually 5 but can go as high as 7 } elsif ($MacOSX) { $NCPUs = `sysctl -n hw.ncpu`; chomp $NCPUs; $loadavg = `sysctl -n vm.loadavg | cut -d" " -f2 -f3 -f4 | tr -d '\n'`; $lenPID = 5; # highest possible pid is 99998. } else { FATAL("parsyncfp only supports Linux and MacOSX at this point\n"); } @SYSLOAD = split (/\s+/, $loadavg); # 1st 3 fields are 1, 5, 15m loads # so as long as the 1m load / NCPUs < 1, we're fine; if > 1, we may want to start throttling.. $LOAD1mratio = $SYSLOAD[0] / $NCPUs; if (! defined $NETIF) { if ($MacOSX) { $NETIF = `netstat -nr | grep "^default" | head -n1 | awk '{print \$6}'`; chomp $NETIF; $myIP = `ifconfig $NETIF | grep 'inet ' | awk '{print \$2}'`; chomp $myIP; } else { #TODO This has to be checked for multi-homed systems and if the system is multihomed, # force a choice as to which one to use via --interface my $ifs = `/sbin/route -n | grep "^0.0.0.0" | awk '{print \$8}' | wc -l`; chomp $ifs; $NETIF = `/sbin/route -n | grep "^0.0.0.0" | awk '{print \$8}'`; chomp $NETIF; if ($ifs != '1'){ die "\nERROR: Your system is multi-homed - I've detected more than 1 active interface: $NETIF \nPlease specify the one you want to use via the '--interface' flag.\n"; } else { $NETIF = `/sbin/route -n | grep "^0.0.0.0" | awk '{print \$8}'`; chomp $NETIF; $myIP = `ifconfig $NETIF | grep 'inet ' | awk '{print \$2}' | cut -d: -f2`; chomp $myIP; } } } my $pqpath = `which perfquery`; if ($NETIF =~ /ib/){ INFO("You've specified what looks like an Infiniband interface [$NETIF]...\n"); if ($pqpath ne "") { $PERFQUERY = 1; INFO(".. and you have 'perfquery installed, so RDMA bytes will be reported as well.\n"); } else { $PERFQUERY = 0; INFO(".. but you don't have 'perfquery' installed, so only TCP bytes will be reported.\n"); } } if (defined $DEBUG) {$VERBOSE = 3;} # DEBUG = VERBOSE=3 if (defined $VERBOSE && ($VERBOSE < 0 || $VERBOSE > 3)) {die "ERROR: --verbose arg must be 0-3. Try again.\n";} if (! defined $NP){$NP = int(sqrt($NCPUs)+ 0.5);} # round sqrt(NCPUs) (hyperthreaded if Intel) 8 -> 3 if (! defined $MAXBW) {$MAXBW = 1000000;} # essentially unlimited else {$MAXBW = int($MAXBW / $NP + 0.5);} # users expect total maxbw; so have to divide by NP. if (! defined $MAXLOAD) {$MAXLOAD = $NP + 2 ;} # + 1 for IO load if (! defined $ROOTDIR) {$ROOTDIR = `pwd`; chomp $ROOTDIR;} # where all dirs must be rooted. if (! defined $FPARTSIZE) {$FPARTSIZE = "10G"; $FPARTSIZE_N = 104857600;} # default is 10Gish elsif ($FPARTSIZE =~ /[PpTtGgMmKk]/) {$FPARTSIZE_N = ptgmk($FPARTSIZE); } else {$FPARTSIZE_N = $FPARTSIZE;} if ($DEBUG) {&debug(__LINE__, "FPARTSIZE = $FPARTSIZE\nFPARTSIZE_N = $FPARTSIZE_N");} # fix .ssh/config file to eliminate wonky errors. fix_ssh_config(); # ?? Is this nec anymore? If so, need to bring it up to date with the new naming conventions # see: https://goo.gl/kDLr8b # get some network info if ($NETIF =~ /eth/) { $IF_SPEED = `ethtool eth0 2> /dev/null | grep Speed | cut -f2 -d:`;} elsif ($NETIF =~ /wlan/) { $IF_SPEED = `iwconfig wlan0 | grep -i quality`; } elsif ($NETIF =~ /ib/) { $IF_SPEED = `ibstat | grep Rate | head -1 | sed -e 's/^[ \t]*//'`; $IF_SPEED = "IB:" . $IF_SPEED; } chomp $IF_SPEED; if ($DEBUG){ print "\tDEBUG: Using network interface [$NETIF] with connection quality [$IF_SPEED]\n\n";} if ($SYSLOAD[0] < $MAXLOAD){ if ($DEBUG){ print "\n\tDEBUG: 1m load is [$SYSLOAD[0]] and the 1m Load:#CPU ratio is [$LOAD1mratio] ( [$NCPUs] CPU cores). OK to continue.\n " } } else { WARN("1m System load is > [$SYSLOAD[0]]. The 1m Load:#CPU ratio is [$LOAD1mratio]. Continue? [Cntrl+C to interrupt; Enter to continue]"); pause(); } $FP_ROOT_DIR = "${parsync_dir}/fpcache"; if (-d $parsync_dir) { if ($VERBOSE >= 1) {WARN("About to remove all the old cached chunkfiles from [$FP_ROOT_DIR]. Enter ^C to stop this. If you specified '--nowait', cache will be cleared in 3s regardless. Otherwise, hit [Enter] and I'll clear them."); } $glob = "${FP_ROOT_DIR}/f*"; if ($NOWAIT){ sleep 3;} elsif ($VERBOSE > 0) {pause();} system("rm -f $glob"); if ($VERBOSE >=2 ) { INFO("The fpart chunk files [$glob] are cleared .. continuing.\n") } } elsif (! -d $parsync_dir) { make_path $parsync_dir or FATAL("Can't create [ $parsync_dir ]"); } if (! -d $FP_ROOT_DIR) {mkdir $FP_ROOT_DIR or FATAL("Can't make 'FP_ROOT_DIR' [$FP_ROOT_DIR]");} $FP_ROOT = $FP_ROOT_DIR . "/f"; # the root name of the fpart chunk files f.1, etc $PIDFILE = $FP_ROOT_DIR . '/' . "rsync-PIDs" . '-' . $DATE; $FPART_LOGFILE = $FP_ROOT_DIR . '/' . "fpart.log." . $DATE; $FP_PIDFILE = $FP_ROOT_DIR . '/' . "FP_PIDFILE" . $DATE; $hdr_rpt = 20; # nbr of lines to repeat the header $hdr_cnt = 21; # header counter; > $hdr_rpt so it gets printed 1st time # this takes care of the last ARGV so that all the rest of the words are target dirs&files $TARGET = $ARGV[$#ARGV]; # remote rsync target if (!defined $TARGET ){ FATAL("No target defined! Where you gonna put this stuff??!?\nTry $0 --help for the built-in help."); } $#ARGV--; if ($TARGET =~ /~/) { FATAL("You defined the target dir with a '~': [$TARGET]. While this SHOULD work, it often doesn't so I'm going to force you to replace it with an explicit remote path. ie instead of using '~/dir', please use '/home//dir. Sorry."); } # now process the dirs $dcnt = 0; $fnd2r = ""; # zero the list of 'files 'n' dirs to rsync' $dirtmp = shift; # should only be dir/files left once getopt finishes (see above) # If there are no files or dirs defined, take the current dir if (!defined $dirtmp) { $dirtmp = `pwd`;} while (defined $dirtmp) { # should work on explicitly named dirs as well as globs. $dirtmp = $ROOTDIR . '/' . $dirtmp; if (! -r $dirtmp){ # quick check to see if its readable. WARN("[$dirtmp] isn't readable; either it's not where you think it is or you need to escalate your privs. Regardless, it won't be transferred in this run."); if ($NOWAIT){ sleep 3;} elsif ($VERBOSE > 0) {pause();} } else { # otherwise, add the file to list to be chunked and transferred. $fnd2r .= $dirtmp . " "; } $dirtmp = shift; } $#ARGV++; # now incr to allow the TARGET to be captured. my @cachefiles = (); # will populate with list of cachefiles to process together. my $fparts_already_running = `ps aux | grep 'fpar[t]'`; chomp $fparts_already_running; if ($fparts_already_running ne ''){ WARN("One or more 'fpart's are already running: ====== [$fparts_already_running] ====== Unless you know that these fparts are valid (ie you're running another parsyncfp in another shell on this machine) and not left over from previous parsyncfp's, you should ^C and kill them off before restarting this run. Pausing for 5s to allow you to read this and take action (or not). If you do nothing, I'll continue. "); sleep 5; } my $x = 0; $fnd2r =~ s/^\s+|\s+$//g ; # trim leading and trailing # Up to the user to escape internal spaces in the names of target dirs. # keeping the following line here as a reminder to think about how to better # address this problem. #$fnd2r =~ s/ /\\ /g; # subs internal spaces with escaped spaces my $cmd = "fpart -v -L -z -s $FPARTSIZE_N -o $FP_ROOT $fnd2r 2> $FPART_LOGFILE & echo \"\${!}\" > $FP_PIDFILE"; # captures the child PID! if ($DEBUG) {print "DEBUG: fpart fork cmd:\n[$cmd]\n";} sleep 5; if ($FPART_PID = fork) { # this actually takes a couple of seconds if ($VERBOSE >= 2) { INFO("Forking fpart. Check [$FPART_LOGFILE] for errors if it hangs.\n"); } } else { system "$cmd"; $FPART_PID = `cat $FP_PIDFILE`; chomp $FPART_PID; exit(0); # it's forked, now exit this stanza } # fpart has been forked; wait for enough chunkfiles to be written to start the rsyncs while (! -e $FP_PIDFILE) { sleep 1; if ($VERBOSE >= 3) {INFO("Waiting for fpart to be forked..\n");} } $FPART_PID = `cat $FP_PIDFILE`; chomp $FPART_PID; my $ready2start = my $waitcnt = $NBR_FP_FLES = 0; my $fp0 = $FP_ROOT . ".0"; my $fp1 = $FP_ROOT . ".1"; my $done = 0; while ($ready2start == 0) { if (-e $fp0) { if ($VERBOSE >= 3) {INFO("[$fp0] visible.\n");} $NBR_FP_FLES++; $ready2start=1; } $waitcnt++; if ($VERBOSE >= 3) {INFO("Waiting [$waitcnt]s for chunk files to be written\r");} sleep 1; } # start up NP rsyncs 1st, then cycle every CHECKPERIOD, checking # of rsyncs still going and # starting new ones as needed until the chunkfiles are exhausted. my $STILL_FP_CHUNKS = my $KEEPGOING = 1; my $FPCFS = "${FP_ROOT}."; # FP Chunk File Stem my $NBR_FP_FLES = `\\ls -U1 ${FPCFS}* | wc -l`; chomp $NBR_FP_FLES; $RSYNCS_GOING = $CUR_FPI = 0; # $CUR_FPI = current FP index if ($VERBOSE >= 2) {INFO("Starting the 1st [$NP] rsyncs ..\n");} my $sc = 0; while ($RSYNCS_GOING < $NP && $KEEPGOING) { # $CUR_FP_FLE = $FP_ROOT . "." . $CUR_FPI ; # the current fp chunkfile if (-e $CUR_FP_FLE) { # if the current chunkfile exists fixfilenames($CUR_FP_FLE, $ROOTDIR); # check & fix for spaces, bad chars. # entire rsync command and PID capture (used in total of 2 places) $logfile = $parsync_dir . '/' ."rsync-logfile-" . $DATE . "_" . $CUR_FPI; $RSYNC_CMD = "rsync --bwlimit=$MAXBW $RSYNCOPTS -a -s --log-file=$logfile --files-from=$CUR_FP_FLE $ROOTDIR $TARGET & echo \"\${!}\" >> $PIDFILE"; # there will be as many logfiles as fp chunkfiles. # ie LOTS. but they can be deleted after the run has been verified.. # TODO don't know if we need this logfile. if ($DEBUG) {&debug(__LINE__, "Complete rsync cmd = [$RSYNC_CMD]");} system("$RSYNC_CMD"); # launch rsync and capture the bg job PID to PIDfile $CUR_FPI++; $RSYNCS_GOING++; } else { # there aren't any more fp chunk files waiting, so check to see if it's finished. $FPART_RUNNING = `ps aux | grep fpar[t] | grep $FPART_PID | wc -l`; chomp $FPART_RUNNING; if ($FPART_RUNNING eq '0') { # so if it's done, then we're done. No more chunk files, so no more rsyncs to start. $KEEPGOING = 0; # signal the while loop to break. } else { # fpart is still going so wait for the next fpart chunkfile to be finished. if ($VERBOSE >= 2) {INFO("waiting [$sc]s for next chunkfile [$CUR_FP_FLE]\r");} sleep 2; $sc += 2; } } } #while ($RSYNCS_GOING < $NP && $KEEPGOING) # so at this point either we've loaded all the rsyncs up to NP or we've completely finished. # If the latter, say good bye. If the former, then we have to keep launching # rsyncs up to NP until we've used up all the fpart chunkfiles. $sPIDs = ""; # running PIDs launched by parsync, suspended PIDs (strings) $NBR_FP_FLES = `\\ls -U1 $FPCFS* | wc -l`; chomp $NBR_FP_FLES; # get current # of chunks my @aprPIDs; # all recorded parsyncfp rsync PIDs ever started my @crrPIDs; # currently RUNNING parsyncfp rsync PIDs. my @csrPIDs; #currently SUSPENDED parsyncfp rsync PIDs. ### FOLLOWING IS THE MAIN PARSYNC-FPART LOOP $FP_RUNNING = `ps aux | grep $FPART_PID | grep fpar[t] | wc -l`; chomp $FP_RUNNING; $ cyclecnt = 0; my $IFN = sprintf("%7s",$NETIF); my $day =`date +"%F"`; chomp $day; # | TCP / RDMA out | if ($VERBOSE == 0) { # ..............|---------- / ---------| print " | Elapsed | 1m | [$IFN] MB/s | Running || Susp'd | Chunks [$day] Time | time(m) | Load | TCP / RDMA out | PIDs || PIDs | [UpTo] of [ToDo]\n"; } my $start_secs = `date +"%s"`; while ($CUR_FPI < $NBR_FP_FLES || $FP_RUNNING || $STILLRSYNCS ) { $rPIDs = ""; # print the header if ($hdr_cnt > $hdr_rpt) { my $glob = "${FP_ROOT}.*"; $hdr_cnt = 0; $nbr_cur_fpc_fles = `\\ls -U1 $glob | wc -l`; chomp $nbr_cur_fpc_fles; $day =`date +"%F"`; chomp $day; if ($VERBOSE > 1) {print " | Elapsed | 1m | [$IFN] MB/s | Running || Susp'd | Chunks [$day] Time | time(m) | Load | TCP / RDMA out | PIDs || PIDs | [UpTo] of [ToDo]\n";} } ($rPIDs, $crr) = get_rPIDs($PIDFILE, $sPIDs); # now get load, bw, etc, and start rsyncs on new chunkfiles or suspend them to # load-balance $loadavg = `cat /proc/loadavg | tr -d '\n'`; # What's the system load? @SYSLOAD = split (/\s+/, $loadavg); # 1st 3 fields are 1, 5, 15m loads $LOAD1mratio = $SYSLOAD[0] / $NCPUs; # print out current data with the date $rPIDs =~ s/^\s+|\s+$//g ; $sPIDs =~ s/^\s+|\s+$//g ; # trim leading & trailing whitespace my $NrPIDs = my @Lr = split(/\s+/, $rPIDs); my $NsPIDs = my @Ls = split(/\s+/, $sPIDs); my $glob = "${FP_ROOT}.*"; $nbr_cur_fpc_fles = `\\ls -U1 $glob | wc -l`; chomp $nbr_cur_fpc_fles; # if fpart is done ($FPART_RUNNING = "") # $FPART_RUNNING = `ps aux | grep fpar[t] | grep $FPART_PID | wc -l`; chomp $FPART_RUNNING; #AND $CUR_FPI >= $nbr_cur_fpc_fles # AND there aren't any $rPIDs AND there aren't any $sPIDs # then I think we're done. # check fpart to see if it's still running.. $FPART_RUNNING = `ps aux | grep fpar[t] | grep $FPART_PID | wc -l`; chomp $FPART_RUNNING; if ($rPIDs eq "" ){$rPIDs = "No running PIDs; will start more next cycle"} my $rDATE=`date +"%T" | sed 's/:/./g' `; chomp $rDATE; # check cycles, print if exceed then reset counter. if ($cyclecnt++ > ($CHECKPERIOD - 4)) { my $avgTCPsend; if ($Linux) { ($avgTCPrecv, $avgTCPsend, $avgRDMArecv, $avgRDMAsend) = getavgnetbw($NETIF, $CHECKPERIOD, $PERFQUERY); chomp $avgTCPsend; $avgTCPsend = ($avgTCPsend / 1048576); # convert to MB chomp $avgRDMAsend; $avgRDMAsend = ($avgRDMAsend / 262144); # convert to MB; use same divisor as rdma-tct-stats } else { my $RDMA_T1 = my $RDMA_T2 = 0; # if ($DEBUG) {print "DEBUG: netstat lines next with myIP=[$myIP]\n";} my $o1_bytes = `netstat -bi | grep $myIP | awk '{print \$10}'`; sleep $CHECKPERIOD; my $o2_bytes = `netstat -bi | grep $myIP | awk '{print \$10}'`; $avgTCPsend = ($o2_bytes - $o1_bytes) / $CHECKPERIOD / 1048576; # (1024^2) } my $cur_secs = `date +"%s"`; my $el_min = ($cur_secs - $start_secs) / 60; # this should switch from scrolling to overwrite when VERBOSE < 2 # print out the line if ($VERBOSE > 0) { printf "%8s %5.2f %5.2f %9.2f / %-9.2f %2d <> %2d [%d] of [%d]", $rDATE, $el_min, $SYSLOAD[0], $avgTCPsend ,$avgRDMAsend, $NrPIDs, $NsPIDs, $CUR_FPI, $nbr_cur_fpc_fles; } # and then over-write it or add a newline for scrolling data. if ($VERBOSE == 1) { printf "\r";} elsif ($VERBOSE >= 2) {printf "\n";} if ($DEBUG) {print "\nDEBUG: rPIDs = $rPIDs; sPIDs = $sPIDs\n";} $cyclecnt = 0; $hdr_cnt++; } ### SUSPEND OR CONTINUE RSYNCS for LOADBALANCING if ($SYSLOAD[0] > $MAXLOAD) { # suspend a PID; then loop as normal. If still high, will continue to # suspend PIDs until there's none left. if ($DEBUG) {print "\nDEBUG: System load [$SYSLOAD[0]] is > MAXLOAD [$MAXLOAD]. Will try to suspend a running rsync to shed load.\n";} # reassign a new list from ONLY RUNNING PIDs to $rPIDs (refresh $rPIDs) # this cmd picks up both suspended and running PIDs- have to remove the suspended ones. # in an efficient way. if ($rPIDs =~ /\d+/) {$rPIDs = `ps -p $rPIDs | grep -v PID| cut -c 1-5 | tr '\n' ' '`;} $rPIDs =~ s/^\s+|\s+$//g ; # trim leading and trailing # turn it into an array - (-> sub?) my $rn = my @ra = split(/\s+/, $rPIDs); my $sn = my @sa = split(/\s+/, $sPIDs); for (my $r=0; $r< $rn; $r++) { for (my $s=0; $s< $sn; $s++) { if ($ra[$r] eq $sa[$s]) {$rPIDs =~ s/$ra[$r]//g;} # delete it from $rPIDs } } # picks up both suspended and running PIDs and the new result has to have something in it as well. if ($rPIDs =~ /\d+/){ # if any still left my $N = my @raPIDs = split(/\s+/, $rPIDs); my $e = 0; # @raPIDs = temp array to carry currently running PIDs while ($e <= $N && $raPIDs[$e] !~ /\d+/){$e++}; if ($DEBUG) {print "\t\tDEBUG:got one: [$raPIDs[$e]]; will now suspend it\n";} kill 'STOP', $raPIDs[$e]; if ($sPIDs !~ /$raPIDs[$e]/) { # If it's not there already $sPIDs = "$sPIDs" . ' ' . "$raPIDs[$e]"; # transfer rPID to sPID. $rPIDs =~ s/$raPIDs[$e]//g; # only then delete that PID fr the rPID string } } else { # there aren't any more PIDs left - all done or killed off.' if ($VERBOSE >= 2) {WARN("No more running rsync PIDs left [$rPIDs]. All rsyncs are suspended [$sPIDs].");} } } elsif ($sPIDs =~ /\d+/) { # if there are sPIDs, unsuspend them one by one # split em my $N = my @saPIDs = split(/\s+/, $sPIDs); my $e = 0; while ($e <= $N && $saPIDs[$e] !~ /\d+/){$e++}; if ($DEBUG) { print "\t\tDEBUG:got one: [$saPIDs[$e]]; will now UNsuspend it\n";} kill 'CONT', $saPIDs[$e]; $rPIDs = "$rPIDs" . ' ' . "$saPIDs[$e]"; # transfer sPID to rPID. $sPIDs =~ s/$saPIDs[$e]//g; # delete that PID fr the sPID string } # end of 'SUSPEND OR CONTINUE to LOADBALANCE.' test loop # and if neither of those conditions are met, then we can launch another rsync. elsif ($crr < $NP) { # then launch another rsync with the next fpart chunkfile $CUR_FP_FLE = "${FP_ROOT}.${CUR_FPI}" ; # generate the next fpart chunk file with $CUR_FPI # if fpart is still going, wait for the next chunkfile to show up my $cfw = 0; $FPART_RUNNING = `ps aux | grep fpar[t] | grep $FPART_PID | wc -l`; chomp $FPART_RUNNING; while (! -e $CUR_FP_FLE && $FPART_RUNNING eq '1'){ if ($VERBOSE >= 2) {INFO("Waiting [$cfw]s for next chunkfile..\r"); sleep 2; $cfw += 2;} } ($rPIDs, $crr) = get_rPIDs($PIDFILE, $sPIDs); my $n = my @a = split(/\s+/, $rPIDs); my $R2SU = $NP - $n; # this is the number of rsyncs to start up $glob = "${FP_ROOT}.*"; my $nbr_cur_fpc_fles = `\\ls -U1 $glob | wc -l`; chomp $nbr_cur_fpc_fles; # $fparts_already_running will be '' if it's finished running. my $fparts_already_running = `ps aux | grep 'fpar[t]'`; chomp $fparts_already_running; # Check this more carefully for exceptions - this is the drop-dead error point # in some situations for ($n=0; $n<$R2SU; $n++) { # make sure we haven't finished $FPART_RUNNING = `ps aux | grep fpar[t] | grep $FPART_PID | wc -l`; chomp $FPART_RUNNING; if ($rPIDs eq "" && $sPIDs eq "" && $CUR_FPI >= $nbr_cur_fpc_fles && $FPART_RUNNING == 0){ # then we're done - exit. if ($VERBOSE >= 2) {INFO("Done. Please check the target to make sure expected files are where they're supposed to be. \n");} # remind user how much storage the cache takes and to clear the cache files my $du_cache = `du -sh $parsync_dir`; chomp $du_cache; if ($VERBOSE >= 2) {INFO(" The parsyncfp cache dir takes up [$du_cache] Don't forget to delete it, but wait until you are sure that your job completed correctly, so you don't need the log files anymore. Reminder: check the parsyncfp log [$logfile] and the fpart log [$FPART_LOGFILE] if there were errors. Use '--verbose=1' for less output. Thanks for using parsyncfp. Tell me how to make it better. \n"); } exit; } while (($CUR_FPI >= $nbr_cur_fpc_fles) && $fparts_already_running ne '') { if ($DEBUG) {print "DEBUG: CUR_FPI=$CUR_FPI >= nbr_cur_fpc_fles=$nbr_cur_fpc_fles?\n";} if ($VERBOSE >= 2) {INFO("Waiting for fpart to get ahead of the transfer..\r"); } $nbr_cur_fpc_fles = `\\ls -U1 $glob | wc -l`; chomp $nbr_cur_fpc_fles; $fparts_already_running = `ps aux | grep 'fpar[t]'`; chomp $fparts_already_running; sleep 2; } $logfile = $parsync_dir . '/' ."rsync-logfile-" . $DATE . "_" . $CUR_FPI; $CUR_FP_FLE = "${FP_ROOT}.${CUR_FPI}" ; # generate the next fpart chunk file with $CUR_FPI $nbr_cur_fpc_fles = `\\ls -U1 $glob | wc -l`; chomp $nbr_cur_fpc_fles; $RSYNC_CMD = "rsync --bwlimit=$MAXBW -a -s --log-file=$logfile $RSYNCOPTS --files-from=$CUR_FP_FLE $ROOTDIR $TARGET & echo \"\${!}\" >> $PIDFILE"; if ($DEBUG) {print "\nDEBUG: Starting [$RSYNC_CMD]\n"; } # check status in a 1s loop checking to start extra rsyncs do we don't wait any more than 1s # OR keep cycling continuously on a 1s loop and ONLY print out info every X cycles. This is the easiest way. if (-e $CUR_FP_FLE) { fixfilenames($CUR_FP_FLE, $ROOTDIR); # check & fix for spaces, bad chars. if ($VERBOSE >= 3) {my $tt = $CUR_FPI+1; INFO("next chunk [$tt] of [$nbr_cur_fpc_fles]\n");} system("$RSYNC_CMD"); # capture the bg job PID to PIDfile $CUR_FPI++; } } ($rPIDs, $crr) = get_rPIDs($PIDFILE, $sPIDs); } # sleep 1; $NBR_FP_FLES = `\\ls -U1 ${FPCFS}* | wc -l`; chomp $NBR_FP_FLES; # get current # of chunks if ($rPIDs =~ /\d+/) { $STILLRSYNCS = 1; } else {$STILLRSYNCS = 0;} } # while ($CUR_FPI < $NBR_FP_FLES ) my $host = `hostname`; if (defined $EMAIL){system("echo 'all rsyncs done' | mail -s 'parsyncfp on host [$host] completed' $EMAIL");} # and based on --disposal, (=c(ompress), =d(elete) =l(eave untouched) all the chunk files. if ($DISPOSE =~ /d/) { if ($VERBOSE >= 2) {print ".. and finally disposing of the cache...";} system("\\rm -rf ${FP_ROOT_DIR}/f*"); } elsif ($DISPOSE =~ /c/ ) { # can it just be put into background? if ($VERBOSE >= 2) {print ".. tarring up your cachefiles...";} $cmd="tar --remove-files -czf ${parsync_dir}/fpcache_${DATE}.tar.gz ${FP_ROOT_DIR} &"; #print "final tar cmd: [$cmd] \n"; system ("$cmd"); } elsif ($VERBOSE >=2 ) { INFO("Your cache files have been left intact in [${FP_ROOT_DIR}]. Please dispose of them as you see fit. Reminder: check [$FPART_LOGFILE] for errors if there were errors. Thanks for using parsyncfp. Tell me how to make it better. \n"); } exit; # ================= subroutines ================= # Define utilities required to run this version of parsync sub check_utils { my %UTILS = ( # required utils to help this run correctly "ethtool" => "", "iwconfig" => "", "fpart" => "", ); # and check that they can be found.. my $utilsz = keys %UTILS; foreach my $util (keys %UTILS){ my $utilpath = `which $util | tr -d '\n'`; if ($utilpath !~ /$util/){ FATAL("[$util] not found. you can get 'fpart' here: and the rest via yum, apt-get, or google. Please install it or correct your PATH variable to include it."); } else { $UTILS{$util} = $utilpath; if ($DEBUG){print "\tEVAL: Found [$util] at [$utilpath].\n"} } } } # usage: ($rPIDs, $crr) = get_rPIDs($PIDFILE, $sPIDs); sub get_rPIDs($$) { # Inputs my $pidfile = shift; # string name of PIDFILE my $spids = shift; # suspended PIDs in a string. my @aprPIDs = (); my $NSusPIDs = 0; my @SusPIDs; my $rpids = ""; # to be generated and returned as a string my @crrPIDs = (); # array that holds the currently running rsync PIDs my @ASRP; # All System Rsync PIDs my $NASRP; my $crr = 0;# currently running rsyncs counter my @crrPIDs = (); my $apr = 0; # all parsyncfp rsync PIDs # how many rsyncs are running? Check the PIDFILE against the rsync PIDs that are running # if there are other rsyncs running, their PIDs won't be in the PIDFILE. # so have to do a diff of the PIDFILE vs all PIDs of rsyncs running. my $ALL_SYS_RSYNC_PIDS = `ps aux | grep rsyn[c] | awk '{print $2}' | sort -g | tr '\n' ' '`; chop $ALL_SYS_RSYNC_PIDS; $NASRP = @ASRP = split(/\s+/, $ALL_SYS_RSYNC_PIDS); open (PIDFILE, "<$pidfile") or FATAL("Can't open PIDFILE [$pidfile]'"); # PIDs from the PIDFILE to compare system rsyncs (could be multiple going) # with parsync-launched rsyncs while () {chomp; $aprPIDs[$apr++] = $_; } # all parsyncfp rsync PIDs close PIDFILE; # if there are any PIDs in the $spids string, split into an array if ($spids =~ /\d+/) { $NSusPIDs = @SusPIDs = split(/\s+/, $spids); } $rpids =~ s/^\s+|\s+$//g ; $spids =~ s/^\s+|\s+$//g ; # strip leading/trailing spaces # suboptimal I know, but the arrays are so small it doesn't matter. for (my $a=0; $a<$NASRP; $a++) { for (my $b=0; $b<$apr; $b++) { # if they match, they're MY rsyncs AND they're running if ($ASRP[$a] eq $aprPIDs[$b]) { $crrPIDs[$crr++] = $aprPIDs[$b]; } } } # dump @crrPIDs into $rpids $rpids = join(" ", @crrPIDs); $crr--; # trim off the extra incr # now mask out the sPIDs from the rPIDs list; works but ugly! $spids =~ s/^\s+|\s+$//g ; if ($spids =~ /\d+/) { # if there are any spids $NSusPIDs = @SusPIDs = split(/\s+/, $spids); for (my $r=0; $r<$NSusPIDs; $r++) { for (my $b=0; $b<$apr; $b++) { # if a sPID == rPID, delete the PID from the $rPIDs string if ( $SusPIDs[$r] eq $aprPIDs[$b]) { $rpids =~ s/$aprPIDs[$b]//g;} } } } return ($rpids, $crr); } sub getavgnetbw ($$$) { # call as (my $avgTCPrecv, $avgTCPsend, $avgRDMArecv, $avgRDMAsend) = getavgnetbw($NETIF, $CHECKPERIOD, $PERFQUERY); my ($avgrec,$avgtrans,$R1,$T1,$R2,$T2,$RDMA_T1,$RDMA_T2,$RDMA_R1,$RDMA_R2, $avgRDMAsend,$avgRDMArecv,$PQ); $avgRDMAsend = $avgRDMArecv = 0; my $NETIF = shift; my $CHECKPERIOD = shift; my $PQ = shift; $R1=`cat /sys/class/net/${NETIF}/statistics/rx_bytes`; $T1=`cat /sys/class/net/${NETIF}/statistics/tx_bytes`; if ($PQ) { $RDMA_T1 = `perfquery -x | grep XmitData | cut -f2 -d: | sed -e 's/\\.*//g'`; chomp $RDMA_T1; $RDMA_R1 = `perfquery -x | grep RcvData | cut -f2 -d: | sed -e 's/\\.*//g'`; chomp $RDMA_R1; } # now sleep sleep $CHECKPERIOD; $R2=`cat /sys/class/net/${NETIF}/statistics/rx_bytes`; $T2=`cat /sys/class/net/${NETIF}/statistics/tx_bytes`; if ($PQ) { $RDMA_T2 = `perfquery -x | grep XmitData | cut -f2 -d: | sed -e 's/\\.*//g'`; chomp $RDMA_T2; $RDMA_R2 = `perfquery -x | grep RcvData | cut -f2 -d: | sed -e 's/\\.*//g'`; chomp $RDMA_R2; # print "[$RDMA_T2] - [$RDMA_T1]\n"; $avgRDMAsend = ( $RDMA_T2 - $RDMA_T1) / $CHECKPERIOD; $avgRDMArecv = ( $RDMA_R2 - $RDMA_R1) / $CHECKPERIOD; } $avgrec = ($R2 - $R1) / $CHECKPERIOD; $avgtrans = ($T2 - $T1) / $CHECKPERIOD; # print "getavgnetbw(): avgRDMAsend = $avgRDMAsend\n"; return ($avgrec, $avgtrans, $avgRDMArecv, $avgRDMAsend); } sub pause { print "Press [ENTER] to continue.\n"; my $tmp = ; } # color info string ($) blue sub INFO($) { my $msg = shift; print color('bold blue'); print " INFO: $msg"; print color('reset'); } # # color warning string ($) orange sub WARN($) { my $msg = shift; print color('bold magenta'); print " WARN: $msg \n"; print color('reset'); } # color error string ($) red sub ERROR($) { my $msg = shift; print color('bold red'); print " ERROR: $msg \n"; print color('reset'); } sub FATAL($) { my $msg = shift; print color('bold red'); print "\n ** FATAL ERROR **: $msg \n\n"; print color('reset'); exit(1); } # call as [debug(__LINE__, "string")] to print line # and debug string sub debug($$) { my $line = shift; my $msg = shift; print STDERR "DEBUG[$line]: $msg\n"; pause; } # fixfilenames reads in a file of filenames and iterates over them, fixing their # names and emitting useful warning if something goes odd. # called like: fixfilenames($CUR_FP_FLE, $ROOTDIR) # where $CUR_FP_FLE = current fpart file (fqpn) # $ROOTDIR = pwd, or where all additional dirs are rooted. sub fixfilenames { my $FN = shift; my $startdir = shift; $startdir .= '/'; # and suffxed with a '/' # print "\nstartdir = $startdir\n"; my $fpnew = $FN . ".new"; open (FP, "< $FN") or die "ERROR: Can't open fp file [$FN]\n."; open (FPN, "> $fpnew") or die "ERROR: Can't open replacement file [$fpnew]\n."; my $lc = my $verified = my $failed = 0; while () { chomp; if ($_ =~ / /) { s/ /\ /g; } # subst all spaces with '\ ' s/^$startdir//g; # and also delete off the startdir (Thanks Ken Bass for the missing '^') print FPN "$_\n"; } close FP; close FPN; rename $fpnew, $FN; # and then rename the new one to the original } # ptgmk converts values suffixed with [PpTtGgMmKk] to bytes correctly # uses the 1024 bytes/kb as oppo to 1000 sub ptgmk { my $instr = shift; # trim spaces from back and front $instr =~ s/^\s+|\s+$//g; my $abbr = chop $instr; my $nbr = $instr; if ($abbr !~ /[PpTtGgMmKk]/) {FATAL("ptgmk() input doesn't contain [PpTtGgMmKk], so nothing to convert.");} if ($abbr =~ /[Kk]/) {$nbr *= 1024; return $nbr;} if ($abbr =~ /[Mm]/) {$nbr *= 1048576; return $nbr;} if ($abbr =~ /[Gg]/) {$nbr *= 1073741824; return $nbr;} if ($abbr =~ /[Tt]/) {$nbr *= 1.09951162778e+12; return $nbr;} if ($abbr =~ /[Pp]/) {$nbr *= 1.12589990684e+15; return $nbr;} } sub fix_ssh_config { $HOME = $ENV{"HOME"}; my $append_fxt = 0; if (-e "$HOME/.ssh/config") { # if it exists, fix it. open (CF, "<$HOME/.ssh/config") or FATAL("Can't open $HOME/.ssh/config, even tho it exists.. WTF??"); while () { if ($_ =~ /ForwardX11Trusted\s+yes/i) { $append_fxt = 0;} if ($_ =~ /ForwardX11Trusted\s+no/i) { $append_fxt = 1;} } close CF; } else { $append_fxt = 1;} if ($append_fxt) { INFO("parsyncfp would like to append 'ForwardX11Trusted yes' & 'ForwardX11 yes' to your ~/.ssh/config. Skipping this may result in a lot of odd ssh warnings being emitted during the run if you don't have ssh set correctly for the remote system, but the transfer should still work.) If this mod of your ~/.ssh/config file is OK, hit [Enter]. Otherwise hit [s] to skip.\n "); my $tmp = ; if ($tmp !~ /[sS]/) { system ("echo -n \"#Next 2 lines added by parsyncfp\nForwardX11Trusted yes\nForwardX11 yes\n\" >> $HOME/.ssh/config" ); system("chmod 600 $HOME/.ssh/config"); INFO("Your ~/.ssh/config file is set correctly.\n"); sleep 1; } else {INFO("Your ~/.ssh/config was not changed.\n"); sleep 1;} } } sub usage { #my $parsync_dir = shift; my $helpfile = "$HOME/.parsyncfp/parsyncfp-help.tmp"; if (! -d "$HOME/.parsyncfp") {mkdir "$HOME/.parsyncfp";} open HLP, ">$helpfile" or die "Can't open the temp help file [$helpfile]\n"; my $helptxt = < to create chunkfiles for rsync to read, bypassing the need to wait for a complete recursive scan. ie, it starts the transfer immediately. For large deep trees, this can be useful. It appropriates rsync's bandwidth throttle mechanism, using '--maxbw' as a passthru to rsync's 'bwlimit' option, but divides it by NP so as to keep the total bw the same as the stated limit. It monitors and shows network bandwidth, but can't change the bw allocation mid-job. It can only suspend rsyncs until the load decreases below the cutoff. If you suspend parsyncfp (^Z), all rsync children will suspend as well, regardless of current state. Unless changed by '--interface', it assumes and monitors the routable interface. The transfer will use whatever interface normal routing provides, normally set by the name of the target. It can also be used for non-host-based transfers (between mounted filesystems) but the network bandwidth continues to be (pointlessly) shown. [NB: Between mounted filesystems, parsyncfp sometimes works very poorly for reasons still mysterious. In such cases, I recommend the fpsync tool contained in the fpart package above]. It only works on dirs and files that originate from the current dir (or specified via "--startdir"). You cannot include dirs and files from discontinuous or higher-level dirs. parsyncfp also does not use rsync's sophisticated/idiosyncratic treatment of trailing '/'s to direct where files vs dirs are sent; dirs are treated as dirs regardless of the trailing '/'. ** the [.parsyncfp] files ** The [.parsyncfp] dir contains the cache dir (fpcache), and the time- stamped log files, which are not NOT overwritten. ** Odd characters in names ** parsyncfp will refuse to transfer some oddly named files (tho it should copy filenames with spaces fine. Filenames with embedded newlines, DOS EOLs, and some other odd chars will be recorded in the log files in the [.parsyncfp] dir. OPTIONS ======= [i] = integer number [s] = "quoted string" [f] = floating point number ( ) = the default if any --NP|np [i] (sqrt(#CPUs)) .............. number of rsync processes to start optimal NP depends on many vars. Try the default and incr as needed --altcache|ac (~/.parsyncfp) ..... alternative cache dir for placing it on a another FS or for running multiple parsyncfps simultaneously --startdir|sd [s] (`pwd`) .................. the directory it starts at(*) --maxbw [i] (unlimited) ........... in KB/s max bandwidth to use (--bwlimit passthru to rsync). maxbw is the total BW to be used, NOT per rsync. --maxload|ml [f] (NP+2) .......... max system load - if loadavg > maxload, an rsync proc will sleep for 10s --chunksize|cs [s] (10G) .... aggregate size of files allocated to one rsync process. Can specify in 'human' terms [100M, 50K, 1T] as well as integer bytes. --rsyncopts|ro [s] ... options passed to rsync as quoted string (CAREFUL!) this opt triggers a pause before executing to verify the command(+) --interface|i [s] ...... network interface to monitor (not use; see above) Only SENT bytes are displayed. --checkperiod|cp [i] (3) ........ sets the period in seconds between updates --verbose|v [0-3] (2) ....sets chattiness. 3=debug; 2=normal; 1=less; 0=none This only affects verbosity post-start; warning & error messages will still be printed. --dispose|d [s] (l) .... what to do with the cache files. (l)eave untouched, (c)ompress to a tarball, (d)elete. --email [s] ..................... email address to send completion message --nowait ............. for scripting, sleep for a few s instead of pausing --version ................................. dumps version string and exits --help ......................................................... this help (*) you can use globs/regexes with --startdir, but only if you're at that point in the dir tree. ie: if you're not in the dir where the globs can be expanded, then the glob will fail. However, explicit dirs can be set from anywhere if given an existing startdir. (+) the '--rsyncopts' string can pass any rsync option to all the rsyncs that will be started. This allows options like '-z' (compression) or '--exclude-from' to filter out unwanted files. Use any 'delete' options carefully tho. Hints & Workarounds =================== IMPORTANT: rsync '--delete' options will not work with '--rsyncopts' bc the multiple parallel rsyncs that parsyncfp launches are independent and therefore don't know about each other (and so cannot exchange info about what should be deleted or not. Use a final 'rsync --delete' to clean up the transfer if that's your need. If you see an error related to "sh: /usr/bin/ls: Argument list too long", it usually means that fpart has generated a huge list of chunkfiles (10s of 1000s) and 'ls' has trouble processing that many. This is usually due to pointing parsyncfp at a huge filesystem, with millions of files, with a chunksize that's too small (resulting in the above-noted too many chunkfiles). You can either increase the chunksize ('--chunksize=100G) which will result in a smaller number of chunk files to process, or split up the source dirs among multiple parsyncfps (which can be done using the '--altcache' option above). Examples ======== == Good example 1 == % parsyncfp --maxload=5.5 --NP=4 \ --chunksize=\$((1024 * 1024 * 4)) \ --startdir='/home/hjm' dir[123] \ hjm\@remotehost:~/backups where = -"-maxload=5.5" will start suspending rsync instances when the 1m system load gets to 5.5 and then unsuspending them when it goes below it. = "--NP=4" forks 4 instances of rsync = "--chunksize=\$((1024 * 1024 * 4))" sets the chunksize, by multiplication or by explicit size: 4194304 = "--startdir='/home/hjm'" sets the working dir of this operation to '/home/hjm' and dir1 dir2 dir3 are subdirs from '/home/hjm' = the target "hjm\@remotehost:~/backups" is the same target rsync would use It uses 4 instances to rsync dir1 dir2 dir3 to hjm\@remotehost:~/backups == Good example 2 == parsyncfp --checkperiod 6 --NP 3 --interface eth0 --chunksize=87682352 \ --rsyncopts="--exclude='[abc]*'" nacs/fabio hjm\@moo:~/backups The above command shows several options used correctly: --chunksize=87682352 - shows that the chunksize option can be used with explicit integers as well as the human specifiers (TGMK). --rsyncopts="--exclude='[abc]*'" - shows the correct form for excluding files based on regexes (note the quoting) nacs/fabio - shows that you can specify subdirs as well as top-level dirs (as long as the shell is positioned in the dir above, or has been specified via '--startdir' == Good example 3 == parsyncfp -v 1 --nowait --ac pfpcache1 --NP 4 --cp=5 --cs=50M --ro '-az' \ linux-4.8.4 moo:~/test The above command shows: - short version of several options (-v for --verbose, --cp for checkperiod, etc) - shows use of --altcache (--ac pfpcache1), writing to relative dir pfpcache1 - again shows use of --rsyncopts (--ro '-az') indicating 'archive' & compression'. - includes '--nowait' to allow unattended scripting of parsyncfp == Error example 1 == % pwd /home/hjm # executing parsyncfp from here % parsyncfp --NP4 /usr/local /media/backupdisk why this is an error: = '--NP4' is not an option (parsyncfp will say "Unknown option: np4" It should be '--NP=4' or '--NP 4' = if you were trying to rsync '/usr/local' to '/media/backupdisk', it will fail since there is no /home/hjm/usr/local dir to use as a source. This will be shown in the log files in ~/.parsync/rsync-logfile-_# as a spew of "No such file or directory (2)" errors The correct version of the above command is: % parsyncfp --NP=4 --startdir=/usr local /media/backupdisk HELP print HLP $helptxt; close HLP; system("less -S $helpfile"); unlink $helpfile; die "Did that help?. Send suggestions for improvement to \n"; }