git_consistency_check.pl (7584B)
#!/usr/bin/env perl use strict; use warnings; use File::Temp qw/ tempfile /; use Fcntl ':flock'; open my $self, '<', $0 or die "Couldn't open self: $!"; flock $self, LOCK_EX | LOCK_NB or die "This script is already running! Aborting!"; #EXPLAINER: # Compare a Git repository (source of truth) to a deployed directory. # # Backstory: # This was created because it was common for folk to add hotfixes in production directly # which then got overwritten with git deployments causing production incidents # this script was a first stage necessity in order to clean up, before being able to enforce # a strict deployment via git only rule. # # Recursively checks for: # - Files that exist in one location but not the other # - Files with size differences # # Useful for: # - Verifying that a deployment matches the committed source # - Catching out-of-band edits on production servers # - Validating integrity of scripts, configs, and assets # # Originally written in bash, but it got too unreadable for the job. # Rewritten in Perl for better sanity and maintainability. # # !! Not intended for projects with compiled binaries (hash mismatches are expected) #USAGE perl <THIS SCRIPT> <GIT REPO PATH> <DEPLOYMENT PATH> die "Usage: $0 DIRECTORY1 DIRECTORY2\n" if scalar(@ARGV) < 2; my $git_locations = $ARGV[0]; my $script_locations = $ARGV[1]; my @git_found_files; my @scripts_found_files; my $file_diffs="./file_diffs.txt"; my $file_list="./file_list.txt"; my $changed_files="./changed_files.txt"; my %processed; my $stem1 = $git_locations; my $stem2 = $script_locations; my ($git, $git_files) = tempfile('/tmp/gitrepo-git-consistency-check-XXXXXXXXXXXX', SUFFIX => '.tmp', UNLINK => 0); my ($scripts, $scripts_files) = tempfile('/tmp/scripts-git-consistency-check-XXXXXXXXXXXX', SUFFIX => '.tmp', UNLINK => 0); #recursively search through target directory for all files sub find_files { my $path = $_[0]; my $output = $_[1]; my @found_files = $_[2]; $path .= '/' if($path !~ /\/$/); for my $file (glob($path . '*')) { if(-d $file) { find_files($file, $output, @found_files); } else { print $output $file."\n"; } } } #take two lists of files, remove the stems of the path, and compare differences between the lists sub list_non_present_files { my $file_diffs = $_[2]; my $stem1 = $_[3]; my $stem2 = $_[4]; my %fl; my %df; open (my $file2,"<",$_[1]) or die "Cannot open file ".$_[1]." for reading: $!"; my %seen; while (my $line1 = <$file2>) { chomp ($line1); $line1 =~ s/\Q$stem1\E//; $line1 =~ s/\Q$stem2\E//; $seen{$line1}++; } close ($file2) or die "Could not finish reading from ".$_[1].": $!"; my $match_name = $_[0] =~ /scripts|gitrepo/p; my $source_name = ${^MATCH}; open (my $file1,"<",$_[0]) or die $!; while (my $line2 = <$file1>) { chomp $line2; $line2 =~ s/\Q$stem1\E//; $line2 =~ s/\Q$stem2\E//; if($seen{$line2}) { $fl{$line2}++; } else { $df{$line2}++ unless $line2 eq ""; } } close ($file1) or die "Could not finish reading from ".$_[0].": $!"; open(my ($diffs), '>>', $file_diffs) or die "Cannot open file ".$file_diffs." for writing: $!"; print $diffs "Only in ".$source_name." folder: \n"; print $diffs "$_\n" for keys %df; print $diffs "\n"; close ($diffs) or die "Could not finish writing to ".$file_diffs.": $!"; open(my ($flist), '>>', $file_list) or die "Cannot open file ".$file_list." for writing: $!"; print $flist "$_\n" for keys %fl; close ($flist) or die "Could not finish writing to ".$file_list.": $!"; } sub compare_files { my $file1 = $_[0]; my $file2 = $_[1]; my $filesize1 = -s $file1; my $filesize2 = -s $file2; #skip anything that isn't a normal file return "" unless -f $file1; return "" unless -f $file2; #TODO: this might not be foolproof in some cases where one file is bigger, but still has less lines if($filesize2 > $filesize1) { my $filetemp = $file2; $file2 = $file1; $file1 = $filetemp; } my $fname = $file1; $fname =~ s/^\///; $fname =~ s/\//-/g; my ($co, $current_output) = tempfile('./'.$fname.'XXXXXXXX', SUFFIX => '.tmp', UNLINK => 1); open(my $in1,"<",$file1) or die "Cannot open file ".$file1." for reading: $!"; open(my $in2,"<",$file2) or die "Cannot open file ".$file2." for reading: $!"; open($co,">>",$current_output) or die "Cannot open file ".$current_output." for writing: $!"; my $lineno = 1; my $is_mismatch = 0; #TODO: if a line exists in file1, but is blank line, and line doesn't exist in file2 because it's shorter, the diff is not picked up while (my $line1 = <$in1>) { my $line2 = <$in2>; $line2 = "\n" unless defined $line2; if ($line1 eq $line2) { ++$lineno; next; } if($is_mismatch<1) { print $co "Mismatch between files: \n".$file1."\n".$file2."\nPlease check:\n"; } print $co "line :".$lineno."\n"; print $co "$line1"; print $co "$line2"; $is_mismatch=1; ++$lineno; } if ($is_mismatch == 1 && !exists $processed{$fname}) { system "gzip $current_output"; $processed{$fname}++; } close $co or die "Cannot close file: ".$current_output.": $!"; close $in1 or die "Cannot close file: ".$file1.": $!"; close $in2 or die "Cannot close file: ".$file2.": $!"; return $file1 if $is_mismatch == 1; return ""; } sub read_list { my $list = $_[0]; my $stem1 = $_[1]; my $stem2 = $_[2]; my $changed_files = $_[3]; my %changed_file_list; my $current_file; open(my $ll,'<',$list) or die "Cannot open file ".$list." for reading: $!"; while (my $line = <$ll>) { chomp $line; if ($line =~ /\.jar$|\.gz$/) { next; } $current_file = compare_files($stem1.$line, $stem2.$line); if ($current_file ne "") { $current_file =~ s/\Q$stem1\E//; $current_file =~ s/\Q$stem2\E//; $changed_file_list{$current_file}++; } } close $ll or die "Cannot close file: ".$list.": $!"; open(my ($chflist), '>>', $changed_files) or die "Cannot open file ".$changed_files." for writing: $!"; print $chflist "The following files have differences between git repo and deployment: \n"; print $chflist "$_\n" for keys %changed_file_list; print $chflist "\n"; close ($chflist) or die "Cannot close file: ".$changed_files.": $!"; } #locate all the files in the git repo and in the deployment directory find_files($git_locations, $git, @git_found_files); find_files($script_locations, $scripts, @scripts_found_files); #force close these files if they're not closed, otherwise you hit some weird buffering problem if($git->opened() == 1) { close $git or die "Cannot close file: $!"; } if($scripts->opened() == 1) { close $scripts or die "Cannot close file: $!"; } #Figure out which files exist only in one or the other dir structure, and print the list of differences list_non_present_files($scripts_files, $git_files, $file_diffs, $stem1, $stem2); list_non_present_files($git_files, $scripts_files, $file_diffs, $stem1, $stem2); #then for the files that exist in BOTH directory structures alike, analyze the differences between all the files read_list($file_list, $stem1, $stem2, $changed_files);