#!/usr/bin/perl -w
# Harry Mangalam, UC Irvine, 2007, harry.mangalam@uci.edu, 949 824 0084
# this script is functional but poorly documented.  Please contact me if
# you want better documentation

# this script is explicitly placed in the Public Domain - you can do anything
# you want with it - have fun.

# after the card and RAID type, and # of raid disks have been set by hand,
# this script should automate the running of several tests that will
# describe the performance of the RAID on several levels
# should also make sure in my case to run the test once on the 3ware
# and once on the areca card for each set of tests.
# also, do tests vs one card impact running the same tests on the other card
# test singly and together to see how they behave under load.
# should also reboot with RAM parameter set low (set kernel param mem=512M)
# to see effect on timing

use Getopt::Long;
use Time::localtime;
use strict;

use vars qw(
  @FS_TYPE $filesystem $RAID_DEV $RAID_MNT %FS_OPTS $READAHEAD @BENCHMARK $bm
  $SOURCE_DIR $RAM_SZ $TIMER $HELP $RAID_CARD $RAID_NBR $JRNL_DEV $ktarball
  $kdir $USER $tmp $bonnie @setra $DEBUG $OUTPUT_DIR $NBR_DBENCH_CLIENTS
  $LOGFILE $PIDTIME $DATESTAMP $tm $dbench  $iozone %MKFS_OPTS $hiresfound
  $t0 $elapsed $BASEFILE $IOZONE_TXT $IOZONE_XLS $TIMERFILE $nco  $test $JRNL_ID $YES  $do_tests $ID_TAG $SETRA $FAKE $NBR_DISKS
);

BEGIN {eval "use Time::HiRes qw(usleep ualarm gettimeofday tv_interval)"; $hiresfound = $@ ? 0 : 1}
#$hiresfound = 0;  # uncomment to simulate not found
if ($hiresfound == 0) {
    print "\nOoops! Time::HiRes (needed for accurate timing) not found\nContinuing without timing.";
} else {
    print "Time::HiRes ... found.\n";
} # $hiresfound

$tm = localtime;
$do_tests = 1;
$SOURCE_DIR = "/home/hjm";
$NBR_DBENCH_CLIENTS = 30;
$DEBUG = 0;
$OUTPUT_DIR = "/home/hjm/bm_logs";
$LOGFILE = "";
$JRNL_ID = $JRNL_DEV = "IntJRNL";
$PIDTIME = $$ . time;
$DATESTAMP = `date +%d%B%y+%H%M`;
chomp $DATESTAMP;
#$DATESTAMP =~ s/ //g; # and compress all this
#$DATESTAMP =~ s/://g;
#$DATESTAMP =~ s/PDT2007//g;
#$SETRA = 256;
$FAKE = 0;
$YES = 0;
$ktarball = "linux-2.6.21.3.tar.bz2";
$kdir = substr($ktarball,0,-8);

@BENCHMARK = (
    "bonnie", "dbench", "tbench", "smbtorture", "iozone", "nco", "kcompile",
);

##########  FILESYSTEMS TO TEST AND RELATED VARIABLES ###############
#  types of filesystems to test  - no reiser4, ext4 yet.
@FS_TYPE = ("xfs", "reiserfs", "ext3", "jfs");

# I'm going to ignore the auto setting of journal params until the rest is working
# just going to use the journal on the same device (usually default)
# some of these filesystems require the external device to be prepped beforehand:
# ie: ext3 needs: mke2fs -O journal_dev /dev/sdc4
#      xfs needs mkfs.xfs -l logdev=/dev/sdb1,size=10000b /dev/sdc4
# reiserfs needs mkfs.reiserfs -j /dev/sdc4 (need file as well?)
#      jfs needs mkfs.jfs -j /dev/sdc4

%MKFS_OPTS = (
    "ext3"     => "-F -F",
    "reiserfs" => "-f -f",
    "jfs"      => "-q",
    "xfs"      => "-f"
);

&GetOptions("ramsz=i"    => \$RAM_SZ,    # size of RAM to use on machine
            "timer=s"    => \$TIMER,     # TIMER string
            "help!"      => \$HELP,      # print the help
            "raiddev=s"  => \$RAID_DEV,  # the RAID device to use
            "raidmnt=s"  => \$RAID_MNT,  # mount point for the RAID
            "card=s"     => \$RAID_CARD, # raid card string - required
            "raid=i"     => \$RAID_NBR,  # integer id of RAID (0,1,5,6,10)
            "jdev=s"     => \$JRNL_DEV,  # the device to use for journal
            "user=s"     => \$USER,      # execute apps as this user if nec
            "setra=i"    => \$SETRA,     # explicit set the readahead size
            "disks=i"    => \$NBR_DISKS, # passthu info abt # disks in raid
            "fake!"      => \$FAKE,      # fake the tests with 'sleep'
            "yes!"       => \$YES,       # answer yes to startup questions
            "debug"      => \$DEBUG,     # debugging flag
);

if (!defined $RAID_DEV) { die "\nYou MUST specify the RAID device!!\n"; }
if (!defined $RAID_MNT) { $RAID_MNT = "/RAID"; }
if (!defined $TIMER)    { $TIMER = "/usr/bin/time -v"; }
if (!defined $RAID_CARD){ $RAID_CARD = `lspci | grep RAID`;}
if (!defined $RAID_NBR) {die "You MUST define the RAID number (0,1,5,6,10) with --raid=";}
if (!defined $NBR_DISKS) {die "You MUST define the # of disks being used with --disks=";}
#if (!defined $JRNL_DEV) {$JRNL_DEV = $RAID_DEV;}
if (!defined $USER) {$USER = $ENV{USER};} # execute as me if not set

if (defined $SETRA) { @setra = ($SETRA);} # set to
else { # set the reaheads to the entire spectrum
    @setra = (256, 512, 1024, 2048, 4096, 8192, 16384, 32768); # readahead values
}

my $who = `whoami`; chomp $who;
if (`whoami` !~ /root/) {
    warn "\nWARN: It looks like you're executing this as [$who], not [root](!)\nYou don't have enough permissions to run this fully. Try prefixing the command with 'sudo'\n\n";}


printf "RAIDCARD str = [%s]\n\n", $RAID_CARD;
if (length($RAID_CARD) < 1){ die "You don't have a recognizable RAID card by 'lspci'"; }

# RAID CARDS need to be changed by hand obviously
# RAID volumes w/ # of disks,  have to be set up by hand.
# RAID 0, RAID1, RAID5, RAID6, RAID10
# also need to do this with 2,3,4,5,6,7,8 disks for R0,R5,R6
if (-e $RAID_MNT) {
    my $ls = `ls $RAID_MNT`;
    print STDERR "WARN: Test mount point [$RAID_MNT] exists and contains:\n---\n$ls---\n";
    if (!$YES) {
        print STDERR "NOTICE: Should I use it anyway? [N/y]";
        $tmp = <STDIN>;
        chomp $tmp;
        if ($tmp =~ "[nN]" || $tmp eq '') {
            die "OK - decide what you want to use and re-try - bye";
        } else {print STDERR "OK - using [$RAID_MNT]\n";}
    }
} else {
    print STDERR "WARN: RAID MOUNT [$RAID_MNT] doesn't exist.  Create it? [N/y]";
    $tmp = <STDIN>;
    chomp $tmp;
    if ($tmp =~ "[nN]" || $tmp eq '') {
        die "OK - decide what you want to use and re-try - bye";
    } else {print STDERR "OK - creating [$RAID_MNT]\n";}
        print "\nsystem: mkdir $RAID_MNT";
}

# at this point, check to see if the DB exists, copy it to a backup if so,
# initialize it if it doesn't exist.  Steal code from the file2db script.



# for each filesystem, do the full suite of benchmarks..
# but due to the complexity of each filesystem type, we may NOT want to do this in a loop.
if ($JRNL_DEV !~ /IntJRNL/) {$JRNL_ID = "ExtJRNL";}
foreach $SETRA (@setra) {
    ## setting READAHEAD
    $READAHEAD = "blockdev --setra $SETRA $RAID_DEV"; # set # to an array of readahead sizes (above)?
    print "INFO: Setting [$READAHEAD]\n";
    system "$READAHEAD"; # and this sets it, to the default 256 if nothing else.



foreach $filesystem (@FS_TYPE) {
    $ID_TAG = "$filesystem $RAID_CARD RAID$RAID_NBR  RA$SETRA DISKS$NBR_DISKS $JRNL_ID $DATESTAMP";
	$BASEFILE = $OUTPUT_DIR . "/" . $filesystem . "_" . $RAID_CARD . "_RAID" . $RAID_NBR . "_RA" . $SETRA . "_DISKS" . $NBR_DISKS . "_" . "$JRNL_ID" . "_" .  $DATESTAMP;
    $LOGFILE = $BASEFILE . ".log";
    $TIMERFILE = $BASEFILE . ".timer";
    $IOZONE_XLS = $BASEFILE . "_iozone.xls";
    $IOZONE_TXT = $BASEFILE . "_iozone.txt";

    open(TIMER, ">$TIMERFILE") or die " can't open [$TIMERFILE] for writing\n";
    print "\nAbout to test filesystem: [$filesystem], logging to:\n  [$LOGFILE]\n";
#    system "cd /"; # cd to a non-$RAID_DIR dir
# if the RAID device is mounted, umount it
    $tmp = `df |grep $RAID_DEV`;
    chomp $tmp;
    if ($tmp =~ /$RAID_DEV/){
        print STDERR "WARN: [$RAID_MNT] is mounted - will try to umount it\n";
        my $result = system "umount $RAID_MNT";
        if ($result != 0){ print  "die - Can't unmount [$RAID_MNT] - Do it manually and re-try.";}
        else { print STDERR "\nNOTICE: Good. RAID filesystem is not mounted.\n";}
    } else { print STDERR "\nNOTICE: Good. RAID filesystem is not mounted.\n";}
    if ($DEBUG) {pause("Tested mount of filesystem", __LINE__)}


# make the new filesystem on the RAID device
#    print "\nsystem: "mkfs.$filesystem $MKFS_OPTS{$filesystem}
# ie: ext3 needs: mke2fs -O journal_dev /dev/sdc4
#      xfs needs mkfs.xfs -l logdev=/dev/sdb1,size=10000b /dev/sdc4
# reiserfs needs mkfs.reiserfs -j /dev/sdc4 (need file as well?)
#      jfs needs mkfs.jfs -j /dev/sdc4

    # NOT dealing with External Journals here.
	if ($JRNL_DEV !~ /IntJRNL/) { # if we want an external journal
        # make sure that the journal device is unmounted
        $tmp = `df |grep $JRNL_DEV`; chomp $tmp;
        if ($tmp =~ /$JRNL_DEV/){
            my $result = system "umount $JRNL_DEV";
            if ($result != 0){ print  "die - Can't unmount [$JRNL_DEV] - Do it manually and re-try.";}
            else { print STDERR "\nNOTICE: Good. Journal dev is not mounted.\n";}
        } else { print STDERR "\nNOTICE: Good.  Journal dev is not mounted.\n";}

        if ($filesystem =~ /ext3/){
			#system "mke2fs -O journal_dev $JRNL_DEV $RAID_DEV";
			#system "mkfs.ext3 $MKFS_OPTS{$filesystem} -J device=$JRNL_DEV $RAID_DEV";}
            $test =  "mke2fs -O journal_dev $JRNL_DEV && mkfs.ext3 $MKFS_OPTS{$filesystem} -J device=$JRNL_DEV $RAID_DEV";
            time_n_exec("$ID_TAG MKFS", $test);
        } elsif ($filesystem =~ /xfs/){
            #system "mkfs.xfs $MKFS_OPTS{$filesystem} -l logdev=$JRNL_DEV,size=10000b $RAID_DEV";}
            $test = "mkfs.xfs $MKFS_OPTS{$filesystem} -l logdev=$JRNL_DEV,size=10000b $RAID_DEV";
            time_n_exec("$ID_TAG MKFS", $test);

        } elsif ($filesystem =~ /jfs/){
            #system "mkfs.jfs  -j $JRNL_DEV $RAID_DEV";
            #$test = "mkfs.jfs $MKFS_OPTS{$filesystem} -j $JRNL_DEV $RAID_DEV";
            #time_n_exec("MKFS.JFS_EXT_JRNL", $test);
            last; # JFS doesn't support an external filesystem.
        } elsif ($filesystem =~ /reiserfs/) {
            #system "mkfs.reiserfs  -j $JRNL_DEV $RAID_DEV";
            $test = "mkfs.reiserfs $MKFS_OPTS{$filesystem} -j $JRNL_DEV $RAID_DEV";
            time_n_exec("$ID_TAG MKFS", $test);
        } else {die "You wanted an externally journalled filesystem, but the filesystem [$filesystem] doesn't support it.\n";}

# for all INTERNAL journal mkfs operations
	} else {
	# This should be timed as well - put it in the LOGFILE
		$test = "mkfs.$filesystem $MKFS_OPTS{$filesystem} $RAID_DEV";
		time_n_exec("$ID_TAG MKFS", $test);
		# should test for success here before proceeding
		#if ($failed) {die "mkfs.$filesystem seems to have failed at __LINE__ !!!!\n";}
	}

    if ($DEBUG) {pause("mkfs done", __LINE__)}
# mount the filesystem
    $test = "mount -t $filesystem $RAID_DEV $RAID_MNT";
# if ($!) {die "mounting $filesystem filesystem seems to have failed at __LINE__ !!!!\n";}
    time_n_exec("$ID_TAG MOUNT", $test);
    if ($DEBUG) {pause("mount with $test",__LINE__)}

# start a new log
	my $header = "###########\nraidcard=$RAID_CARD\nraid_level=$RAID_NBR\nfilesystem=$filesystem\njournal=$JRNL_DEV\n";
    system "echo $header >> $LOGFILE";
    #print TIMER $header;
    if ($DEBUG) {
    	print STDERR $header;
		pause("Header printed", __LINE__)
	}

# make the benchmark directory on the new filesystem and chown it to a non-root user
    print STDERR "About to make the [$RAID_MNT/benchmarks] dir \n";
    system "mkdir $RAID_MNT/benchmarks";
    system "chown -R $USER.$USER $RAID_MNT/benchmarks";
    if ($DEBUG) {pause("mkdir  & chown'ed bm dir", __LINE__)}

##########################################################################
############ and start the benchmark tests ###############################
##########################################################################

if ($do_tests) {

    ### bonnie++ tests - -r flag set to 400 or 0 (to prevent memchecking )
        $test = "bonnie++ -d $RAID_MNT/benchmarks -r 400 -s 2g -n 3 -u $USER  >> $LOGFILE";
        if ($FAKE) {$test = "sleep 3.4s"}
        print STDERR "TEST string: $test \n";
        time_n_exec("$ID_TAG BONNIE", $test);
        if ($DEBUG) {pause("bonnie",__LINE__)}

# dbench is halting on jfs and is not providing a whole lot of info, so will
# eliminate it for now.
#     ### dbench see: http://samba.org/ftp/tridge/dbench/
#         $test = "dbench -t 120 -D $RAID_MNT/benchmarks $NBR_DBENCH_CLIENTS >> $LOGFILE";
#         if ($FAKE) {$test = "sleep 3.4s"}
#         #$test = "ls -lS /var >> $LOGFILE";
#         print STDERR "TEST string: $test \n";
#         time_n_exec("$ID_TAG DBENCH", $test);
#         if ($DEBUG) {pause("dbench",__LINE__)}

    ### tbench - only tests network side of smb connections - not really useful here
        #system "echo \"===\nTBENCH\n\" >> $LOGFILE ";
        #if ($FAKE) {$test = "sleep 3.4s"}
        #print STDERR "TEST string: $test \n";
        #time_n_exec("$ID_TAG TBENCH", $test);
        #if ($DEBUG) {pause("tbench", __LINE__)}

    ### smbtorture - this requires an smbserver running as well -
    ### might not get this running for this test.
        #system "echo \"===\nSMBTORTURE\n\" >> $LOGFILE ";
        #$test = "smbtorture yadda yadda";
        #if ($FAKE) {$test = "sleep 3.4s"}
        #print STDERR "TEST string: $test \n";
        #time_n_exec("$ID_TAG SMBTORTURE", $test);
        #if ($DEBUG) {pause("smbtorute",__LINE__)}

    ### iozone see: www.iozone.org for code and info
        $test = "iozone -a -b $IOZONE_XLS -f $RAID_MNT/iozone.tmp >> $IOZONE_TXT ";
        if ($FAKE) {$test = "sleep 3.4s"}
        print STDERR "TEST string: $test \n";
        time_n_exec("$ID_TAG IOZONE", $test);
        # above writes the output to $IOZONE_TXT, but
        if ($DEBUG) {pause("iozone",__LINE__)}

    ### nco - see: nco.sourceforge.net for latest package and info
        $test = "cp $SOURCE_DIR/big.nc $RAID_MNT/benchmarks/00.nc; ";
        $test .= "cd $RAID_MNT/benchmarks;";
        $test .= "cp 00.nc 01.nc && cp 00.nc 02.nc && cp 00.nc 03.nc && cp 00.nc 04.nc";
        $test .= "/usr/local/bin/ncecat 00.nc  01.nc  02.nc  03.nc  04.nc all.nc";
        #if ($FAKE) {$test = "sleep 3.4s"}
        time_n_exec("$ID_TAG NCECAT", $test);
        if ($DEBUG) {pause("nco",__LINE__)}

    # The followign stanza was a good general purpose test but in doing so, does a bunch of
    # mixed file operations, so it's not as good for the actual NCO test (large file
    # operations), so I've commented it out here.  If you want to re-inject it, just
    # uncomment.
    # First we cp over the entire (cleaned) nco src tree; run the top-level script that
    # descends into each of hdf, netcdf, nco and makes the tree, then runs the nco benchmarks
    # using the already compiled libs already installed.
        #$test = "cp -a $SOURCE_DIR/nco_src $RAID_MNT/ && cd $RAID_MNT/nco_src && ./run_nco_bm.sh";
        # need to make a nice run_nco_bm.sh script now
        #$test = "ls -lS /usr/bin >> $LOGFILE";
        #time_n_exec("$ID_TAG NCO", $test);
        #if ($DEBUG) {pause("nco",__LINE__)}


    ### kcompile The std kernel compile operation incl copying, untarring, and then\
      # making the bzImage and all the modules, using a std .config
        $test  = "mkdir -p $RAID_MNT/kernel && cd $RAID_MNT/kernel; ";
        $test .= "cp $SOURCE_DIR/$ktarball . ; ";
        $test .= "tar -xjf $ktarball && cd $kdir; ";
        $test .= "cp $SOURCE_DIR/kernel.config ./.config; ";
        $test .= "make -j8 bzImage && make -j8 modules ;";
        if ($FAKE) {$test = "sleep 3.4s"}
        print STDERR "Compile string = [$test] \n";
        time_n_exec("$ID_TAG KERNEL", $test);
        if ($DEBUG) {pause("kernel compile",__LINE__)}

    }


# and mail me when each filesystem run is done
# this part obviously depends on mutt being installed and configured correctly
	#my $mesg = "Benchmark for [$filesystem] is done \@ $DATESTAMP";
	system "su -c \"/bin/echo $ID_TAG  | /usr/bin/mutt -s \'$filesystem\' harry.mangalam\@uci.edu \" hjm";
    #system "/bin/echo $ID_TAG  | /usr/bin/mutt -s \" bs $filesystem\" hjm\@tacgi.com";
    if ($DEBUG) {pause("Mailed hjm",__LINE__);}


}
}

# this should be the end.. no?

# Should we combine the log and timer files?  Can still grep out the bits we need
# this needs the bits to store the results in a SQLite db.
sub time_n_exec {
	my $title = shift;
	my $test = shift;
	system "/bin/echo \"#####\n$filesystem : $title\n$test\n\" >> $LOGFILE";
	my $t0 = [gettimeofday()];
	system "$test"; # writes stdout to the logfile
	my $elapsed = tv_interval($t0, [gettimeofday()]);
	print TIMER "$title=$elapsed\n\n";
 #  write timer data to db table.  This will write the overall timer data to a SUMMARY
    # table.  To write the finer grained data to tables, need to do some more extravagant
    # grepping and splitting in the individual stanzas above.
	#pause("Mounted filesystem?",__LINE__);
}

sub pause{
	my $comment = shift;
    my $line = shift;
    print "\n[$comment @ line $line 'Enter' to continue]\n"; $tmp = <STDIN>;
}