git_consistency_check.pl (7584B)
#!/usr/bin/env perl
use strict;
use warnings;
use File::Temp qw/ tempfile /;
use Fcntl ':flock';
open my $self, '<', $0 or die "Couldn't open self: $!";
flock $self, LOCK_EX | LOCK_NB or die "This script is already running! Aborting!";
#EXPLAINER:
# Compare a Git repository (source of truth) to a deployed directory.
#
# Backstory:
# This was created because it was common for folk to add hotfixes in production directly
# which then got overwritten with git deployments causing production incidents
# this script was a first stage necessity in order to clean up, before being able to enforce
# a strict deployment via git only rule.
#
# Recursively checks for:
# - Files that exist in one location but not the other
# - Files with size differences
#
# Useful for:
# - Verifying that a deployment matches the committed source
# - Catching out-of-band edits on production servers
# - Validating integrity of scripts, configs, and assets
#
# Originally written in bash, but it got too unreadable for the job.
# Rewritten in Perl for better sanity and maintainability.
#
# !! Not intended for projects with compiled binaries (hash mismatches are expected)
#USAGE perl <THIS SCRIPT> <GIT REPO PATH> <DEPLOYMENT PATH>
die "Usage: $0 DIRECTORY1 DIRECTORY2\n" if scalar(@ARGV) < 2;
my $git_locations = $ARGV[0];
my $script_locations = $ARGV[1];
my @git_found_files;
my @scripts_found_files;
my $file_diffs="./file_diffs.txt";
my $file_list="./file_list.txt";
my $changed_files="./changed_files.txt";
my %processed;
my $stem1 = $git_locations;
my $stem2 = $script_locations;
my ($git, $git_files) = tempfile('/tmp/gitrepo-git-consistency-check-XXXXXXXXXXXX', SUFFIX => '.tmp', UNLINK => 0);
my ($scripts, $scripts_files) = tempfile('/tmp/scripts-git-consistency-check-XXXXXXXXXXXX', SUFFIX => '.tmp', UNLINK => 0);
#recursively search through target directory for all files
sub find_files {
my $path = $_[0];
my $output = $_[1];
my @found_files = $_[2];
$path .= '/' if($path !~ /\/$/);
for my $file (glob($path . '*')) {
if(-d $file) {
find_files($file, $output, @found_files);
} else {
print $output $file."\n";
}
}
}
#take two lists of files, remove the stems of the path, and compare differences between the lists
sub list_non_present_files {
my $file_diffs = $_[2];
my $stem1 = $_[3];
my $stem2 = $_[4];
my %fl;
my %df;
open (my $file2,"<",$_[1]) or die "Cannot open file ".$_[1]." for reading: $!";
my %seen;
while (my $line1 = <$file2>) {
chomp ($line1);
$line1 =~ s/\Q$stem1\E//;
$line1 =~ s/\Q$stem2\E//;
$seen{$line1}++;
}
close ($file2) or die "Could not finish reading from ".$_[1].": $!";
my $match_name = $_[0] =~ /scripts|gitrepo/p;
my $source_name = ${^MATCH};
open (my $file1,"<",$_[0]) or die $!;
while (my $line2 = <$file1>) {
chomp $line2;
$line2 =~ s/\Q$stem1\E//;
$line2 =~ s/\Q$stem2\E//;
if($seen{$line2}) {
$fl{$line2}++;
} else {
$df{$line2}++ unless $line2 eq "";
}
}
close ($file1) or die "Could not finish reading from ".$_[0].": $!";
open(my ($diffs), '>>', $file_diffs) or die "Cannot open file ".$file_diffs." for writing: $!";
print $diffs "Only in ".$source_name." folder: \n";
print $diffs "$_\n" for keys %df;
print $diffs "\n";
close ($diffs) or die "Could not finish writing to ".$file_diffs.": $!";
open(my ($flist), '>>', $file_list) or die "Cannot open file ".$file_list." for writing: $!";
print $flist "$_\n" for keys %fl;
close ($flist) or die "Could not finish writing to ".$file_list.": $!";
}
sub compare_files {
my $file1 = $_[0];
my $file2 = $_[1];
my $filesize1 = -s $file1;
my $filesize2 = -s $file2;
#skip anything that isn't a normal file
return "" unless -f $file1;
return "" unless -f $file2;
#TODO: this might not be foolproof in some cases where one file is bigger, but still has less lines
if($filesize2 > $filesize1) {
my $filetemp = $file2;
$file2 = $file1;
$file1 = $filetemp;
}
my $fname = $file1;
$fname =~ s/^\///;
$fname =~ s/\//-/g;
my ($co, $current_output) = tempfile('./'.$fname.'XXXXXXXX', SUFFIX => '.tmp', UNLINK => 1);
open(my $in1,"<",$file1) or die "Cannot open file ".$file1." for reading: $!";
open(my $in2,"<",$file2) or die "Cannot open file ".$file2." for reading: $!";
open($co,">>",$current_output) or die "Cannot open file ".$current_output." for writing: $!";
my $lineno = 1;
my $is_mismatch = 0;
#TODO: if a line exists in file1, but is blank line, and line doesn't exist in file2 because it's shorter, the diff is not picked up
while (my $line1 = <$in1>) {
my $line2 = <$in2>;
$line2 = "\n" unless defined $line2;
if ($line1 eq $line2) {
++$lineno;
next;
}
if($is_mismatch<1) {
print $co "Mismatch between files: \n".$file1."\n".$file2."\nPlease check:\n";
}
print $co "line :".$lineno."\n";
print $co "$line1";
print $co "$line2";
$is_mismatch=1;
++$lineno;
}
if ($is_mismatch == 1 && !exists $processed{$fname}) {
system "gzip $current_output";
$processed{$fname}++;
}
close $co or die "Cannot close file: ".$current_output.": $!";
close $in1 or die "Cannot close file: ".$file1.": $!";
close $in2 or die "Cannot close file: ".$file2.": $!";
return $file1 if $is_mismatch == 1;
return "";
}
sub read_list {
my $list = $_[0];
my $stem1 = $_[1];
my $stem2 = $_[2];
my $changed_files = $_[3];
my %changed_file_list;
my $current_file;
open(my $ll,'<',$list) or die "Cannot open file ".$list." for reading: $!";
while (my $line = <$ll>) {
chomp $line;
if ($line =~ /\.jar$|\.gz$/) {
next;
}
$current_file = compare_files($stem1.$line, $stem2.$line);
if ($current_file ne "") {
$current_file =~ s/\Q$stem1\E//;
$current_file =~ s/\Q$stem2\E//;
$changed_file_list{$current_file}++;
}
}
close $ll or die "Cannot close file: ".$list.": $!";
open(my ($chflist), '>>', $changed_files) or die "Cannot open file ".$changed_files." for writing: $!";
print $chflist "The following files have differences between git repo and deployment: \n";
print $chflist "$_\n" for keys %changed_file_list;
print $chflist "\n";
close ($chflist) or die "Cannot close file: ".$changed_files.": $!";
}
#locate all the files in the git repo and in the deployment directory
find_files($git_locations, $git, @git_found_files);
find_files($script_locations, $scripts, @scripts_found_files);
#force close these files if they're not closed, otherwise you hit some weird buffering problem
if($git->opened() == 1) {
close $git or die "Cannot close file: $!";
}
if($scripts->opened() == 1) {
close $scripts or die "Cannot close file: $!";
}
#Figure out which files exist only in one or the other dir structure, and print the list of differences
list_non_present_files($scripts_files, $git_files, $file_diffs, $stem1, $stem2);
list_non_present_files($git_files, $scripts_files, $file_diffs, $stem1, $stem2);
#then for the files that exist in BOTH directory structures alike, analyze the differences between all the files
read_list($file_list, $stem1, $stem2, $changed_files);