#!/usr/bin/perl -w # # kdirstat-cache-writer - script to write KDirStat cache files from cron jobs # # From V2.5.1 on, KDirStat can read its information from cache files. # This is a lot faster than reading all the directories in a directory tree and # obtaining detailed information (size, type, last modification time) for each # file and directory with the opendir() / readdir() and lstat() system calls # for each individual file and directory. # # KDirStat can also write those cache files ("Write Cache File..." from the # "File" menu), but the whole point of cache files is being able to do that in # the background when the user does not have to wait for it - like in a cron # job running in the middle of the night. KDirStat itself cannot be used to do # that because it is a KDE program and thus an X program that needs access to # an X display - which cron does not provide. # # This is what this Perl script is for. # # Usage: # kdirstat-cache-writer [-lvdh] [] # # If not specified, defaults to ".kdirstat.cache.gz" # in . # # If ends with ".gz", it will be compressed with gzip. # kdirstat can read gzipped and plain text cache files. # # -l long format - always add full path, even for plain files # -m scan mounted file systems (cross file system boundaries) # -v verbose # -d debug # -h help (usage message) # # Author: Stefan Hundhammer # Updated: 2006-01-02 # TO DO: # # - ensure to use UTF-8 use strict; use English; use Getopt::Std; use Fcntl ':mode'; use Encode; use URI::Escape qw(uri_escape); use vars qw( $opt_l $opt_m $opt_v $opt_d $opt_h ); # Forward declarations. sub main(); # Global variables. my $long_format = 0; my $scan_mounted = 0; my $verbose = 0; my $debug = 0; my $default_cache_file_name = ".kdirstat.cache.gz"; my $toplevel_device = undef; my $unsafe_chars = "\x00-\x20%"; # Call the main function and exit. # DO NOT enter any other code outside a sub - # any variables would otherwise be global. main(); exit 0; #----------------------------------------------------------------------------- sub main() { # Extract command line options. # This will set a variable opt_? for any option, # e.g. opt_v if option '-v' is passed on the command line. getopts('lmvdh'); usage() if $opt_h; $long_format = 1 if $opt_l; $scan_mounted = 1 if $opt_m; $verbose = 1 if $opt_v; $debug = 1 if $opt_d; # One or two parameters are required # (yes, Perl does weird counting) usage() if $#ARGV < 0 || $#ARGV > 1; my $toplevel_dir = shift @ARGV; $toplevel_dir = absolute_path( $toplevel_dir ); my $cache_file_name; if ( $#ARGV < 0 ) # No more command line arguments? { $cache_file_name = $toplevel_dir . "/" . $default_cache_file_name; } else { $cache_file_name = shift @ARGV; } write_cache_file( $toplevel_dir, $cache_file_name ); compress_file( $cache_file_name ); } #----------------------------------------------------------------------------- # Write a KDirStat cache. # # Parameters: # $toplevel_dir # $cache_file_name sub write_cache_file() { my ( $toplevel_dir, $cache_file_name ) = @_; my $start_time = time(); open( CACHE, ">" . $cache_file_name ) or die "Can't open $cache_file_name"; binmode( CACHE, ":bytes" ); write_cache_header(); write_cache_tree( $toplevel_dir ); my $elapsed = time() - $start_time; my ( $sec, $min, $hours ) = gmtime( $elapsed ); printf CACHE "# Elapsed time: %d:%02d:%02d\n", $hours, $min, $sec; close( CACHE ); } #----------------------------------------------------------------------------- # Compress a file if its extension is ".gz". # # Parameters: # $file_name sub compress_file() { my ( $file_name ) = @_; if ( $file_name =~ /.*\.gz$/ ) { my $uncompressed_name = $file_name; $uncompressed_name =~ s/\.gz$//; # Cut off ".gz" extension rename( $file_name, $uncompressed_name ); logf( "Compressing $file_name" ); system( "gzip $uncompressed_name" ); } } #----------------------------------------------------------------------------- # Write the cache file header # # Parameters: # --- sub write_cache_header() { print CACHE <<'EOF'; [kdirstat 2.5.1 cache file] # Generated by kdirstat-cache-writer # Do not edit! # # Type path size mtime EOF } #----------------------------------------------------------------------------- # Write cache entries for a directory tree. # # Parameters: # $dir Starting directory sub write_cache_tree($); # Need prototype for calling recursively sub write_cache_tree($) { my ( $dir ) = @_; logf( "Reading $dir" ); my @files; my @subdirs; my $success = opendir( DIR, $dir ); if ( ! $success ) { my $msg = "Can't open $dir: $ERRNO\n"; print CACHE "# $msg\n"; logf( $msg ); return; } my $entry; while ( $entry = readdir( DIR ) ) { if ( $entry ne "." and $entry ne ".." ) { my $full_path = $dir . "/" . $entry; if ( -d $full_path && ! -l $full_path ) { push @subdirs, $entry; } else { push @files, $entry; } } } closedir( DIR ); if ( write_dir_entry( $dir ) ) { my $file; foreach $file ( @files ) { write_file_entry( $dir, $file ); } my $subdir; foreach $subdir ( @subdirs ) { write_cache_tree( $dir . "/" . $subdir ); } } } #----------------------------------------------------------------------------- # Write a cache entry for a directory. # # If the device of this directory is not the same as the toplevel device # (i.e., if this is a mount point and thus file system boundaries would be # crossed) only a comment line is written and an error value '0' is returned # unless the "-m" command line option was used. # # Parameters: # $dir directory # # Return value: # 1 OK to continue # 0 don't continue, file system boundary would be crossed sub write_dir_entry() { my ( $dir ) = @_; my ( $dev, $ino, $mode, $links, $uid, $gid, $rdev, $size, $atime, $mtime, $ctime, $blksize, $blocks ) = lstat( $dir ); if ( ! defined( $toplevel_device ) ) { $toplevel_device = $dev; } if ( $dev == $toplevel_device || $scan_mounted ) { $dir = uri_escape( $dir, $unsafe_chars ); print CACHE "D $dir"; print CACHE "\t$size"; printf CACHE "\t0x%x\n", $mtime; return 1; } else { my $msg = "Not crossing mount point $dir (use -m to override)"; print CACHE "# $msg\n"; logf( $msg ); } } #----------------------------------------------------------------------------- # Write a cache entry for a plain file (or other non-directory i-node) # # Parameters: # $dir directory # $name file name (without path) sub write_file_entry() { my ( $dir, $name ) = @_; my ( $dev, $ino, $mode, $links, $uid, $gid, $rdev, $size, $atime, $mtime, $ctime, $blksize, $blocks ) = lstat( $dir . "/" . $name ); my $type = "F"; if ( S_ISREG ( $mode ) ) { $type = "F"; } elsif ( S_ISLNK ( $mode ) ) { $type = "L"; } elsif ( S_ISBLK ( $mode ) ) { $type = "BlockDev"; } elsif ( S_ISCHR ( $mode ) ) { $type = "CharDev"; } elsif ( S_ISFIFO( $mode ) ) { $type = "FIFO"; } elsif ( S_ISSOCK( $mode ) ) { $type = "Socket"; } print CACHE "$type"; $name = uri_escape( $name, $unsafe_chars ); if ( $long_format ) { $dir = uri_escape( $dir, $unsafe_chars ); print CACHE " $dir/$name"; } else { print CACHE "\t$name"; } print CACHE "\t$size"; printf CACHE "\t0x%x", $mtime; print CACHE "\tblocks: $blocks" if $blocks > 0 && $blocks * 512 < $size; # Sparse file? print CACHE "\tlinks: $links" if $links > 1; print CACHE "\n"; } #----------------------------------------------------------------------------- # Make an absolute path of a possible relative path. # # Parameters: # $dir relative or absolute path # # Return value: # absolute path sub absolute_path() { my ( $dir ) = @_; return $dir if ( $dir =~ '^/' ); my $save_dir = $ENV{'PWD'}; chdir( $dir ); $dir = $ENV{'PWD'}; chdir $save_dir; return $dir; } #----------------------------------------------------------------------------- # Log a message to stdout if verbose mode is set # (command line option '-v'). # # Parameters: # Messages to write (any number). sub logf() { my $msg; if ( $verbose ) { foreach $msg( @_ ) { print $msg . " "; } $OUTPUT_AUTOFLUSH = 1; # inhibit buffering print "\n"; } } #----------------------------------------------------------------------------- # Log a debugging message to stdout if debug mode is set # (command line option '-d'). # # Parameters: # Messages to write (any number). sub deb() { my $msg; if ( $debug ) { foreach $msg( @_ ) { print $msg . " "; } $OUTPUT_AUTOFLUSH = 1; # inhibit buffering print "\n"; } } #----------------------------------------------------------------------------- # Print usage message and abort program. # # Parameters: # --- sub usage() { die <<"USAGE-END"; kdirstat-cache-writer - script to write KDirStat cache files from cron jobs From V2.5.1 on, KDirStat can read its information from cache files. This is a lot faster than reading all the directories in a directory tree and obtaining detailed information (size, type, last modification time) for each file and directory with the opendir() / readdir() and lstat() system calls for each individual file and directory. KDirStat can also write those cache files (\"Write Cache File...\" from the \"File\" menu), but the whole point of cache files is being able to do that in the background when the user does not have to wait for it - like in a cron job running in the middle of the night. KDirStat itself cannot be used to do that because it is a KDE program and thus an X program that needs access to an X display - which cron does not provide. This is what this Perl script is for. Usage: $0 [-ldvh] [] If not specified, defaults to \"$default_cache_file_name\" in . If ends with \".gz\", it will be compressed with gzip. kdirstat can read gzipped and plain text cache files. -l long format - always add full path, even for plain files -m scan mounted file systems (cross file system boundaries) -v verbose -d debug -h help (this usage message) USAGE-END } # EOF