commit 6ce27eaf5bb2fa96ea5553a41d087518e4dac747
parent 8b90b1d85fb937327964a0e9f1de2b495d3dbd90
Author: root <root>
Date: Tue, 22 Apr 2025 20:33:26 +0200
add git consistency check perl script
Diffstat:
1 file changed, 231 insertions(+), 0 deletions(-)
diff --git a/perl/git_consistency_check.pl b/perl/git_consistency_check.pl
@@ -0,0 +1,231 @@
+#!/usr/bin/env perl
+
+use strict;
+use warnings;
+use File::Temp qw/ tempfile /;
+use Fcntl ':flock';
+
+open my $self, '<', $0 or die "Couldn't open self: $!";
+flock $self, LOCK_EX | LOCK_NB or die "This script is already running! Aborting!";
+
+#EXPLAINER:
+# Compare a Git repository (source of truth) to a deployed directory.
+#
+# Backstory:
+# This was created because it was common for folk to add hotfixes in production directly
+# which then got overwritten with git deployments causing production incidents
+# this script was a first stage necessity in order to clean up, before being able to enforce
+# a strict deployment via git only rule.
+#
+# Recursively checks for:
+# - Files that exist in one location but not the other
+# - Files with size differences
+#
+# Useful for:
+# - Verifying that a deployment matches the committed source
+# - Catching out-of-band edits on production servers
+# - Validating integrity of scripts, configs, and assets
+#
+# Originally written in bash, but it got too unreadable for the job.
+# Rewritten in Perl for better sanity and maintainability.
+#
+# !! Not intended for projects with compiled binaries (hash mismatches are expected)
+#USAGE perl <THIS SCRIPT> <GIT REPO PATH> <DEPLOYMENT PATH>
+
+die "Usage: $0 DIRECTORY1 DIRECTORY2\n" if scalar(@ARGV) < 2;
+
+my $git_locations = $ARGV[0];
+my $script_locations = $ARGV[1];
+
+my @git_found_files;
+my @scripts_found_files;
+my $file_diffs="./file_diffs.txt";
+my $file_list="./file_list.txt";
+my $changed_files="./changed_files.txt";
+my %processed;
+
+my $stem1 = $git_locations;
+my $stem2 = $script_locations;
+
+my ($git, $git_files) = tempfile('/tmp/gitrepo-git-consistency-check-XXXXXXXXXXXX', SUFFIX => '.tmp', UNLINK => 0);
+my ($scripts, $scripts_files) = tempfile('/tmp/scripts-git-consistency-check-XXXXXXXXXXXX', SUFFIX => '.tmp', UNLINK => 0);
+
+#recursively search through target directory for all files
+sub find_files {
+ my $path = $_[0];
+ my $output = $_[1];
+ my @found_files = $_[2];
+
+ $path .= '/' if($path !~ /\/$/);
+
+ for my $file (glob($path . '*')) {
+ if(-d $file) {
+ find_files($file, $output, @found_files);
+ } else {
+ print $output $file."\n";
+ }
+ }
+}
+
+#take two lists of files, remove the stems of the path, and compare differences between the lists
+sub list_non_present_files {
+ my $file_diffs = $_[2];
+ my $stem1 = $_[3];
+ my $stem2 = $_[4];
+ my %fl;
+ my %df;
+
+ open (my $file2,"<",$_[1]) or die "Cannot open file ".$_[1]." for reading: $!";
+ my %seen;
+ while (my $line1 = <$file2>) {
+ chomp ($line1);
+ $line1 =~ s/\Q$stem1\E//;
+ $line1 =~ s/\Q$stem2\E//;
+ $seen{$line1}++;
+ }
+
+ close ($file2) or die "Could not finish reading from ".$_[1].": $!";
+
+ my $match_name = $_[0] =~ /scripts|gitrepo/p;
+ my $source_name = ${^MATCH};
+
+ open (my $file1,"<",$_[0]) or die $!;
+ while (my $line2 = <$file1>) {
+ chomp $line2;
+ $line2 =~ s/\Q$stem1\E//;
+ $line2 =~ s/\Q$stem2\E//;
+ if($seen{$line2}) {
+ $fl{$line2}++;
+ } else {
+ $df{$line2}++ unless $line2 eq "";
+ }
+ }
+
+ close ($file1) or die "Could not finish reading from ".$_[0].": $!";
+
+ open(my ($diffs), '>>', $file_diffs) or die "Cannot open file ".$file_diffs." for writing: $!";
+ print $diffs "Only in ".$source_name." folder: \n";
+ print $diffs "$_\n" for keys %df;
+ print $diffs "\n";
+ close ($diffs) or die "Could not finish writing to ".$file_diffs.": $!";
+
+ open(my ($flist), '>>', $file_list) or die "Cannot open file ".$file_list." for writing: $!";
+ print $flist "$_\n" for keys %fl;
+ close ($flist) or die "Could not finish writing to ".$file_list.": $!";
+
+}
+
+sub compare_files {
+ my $file1 = $_[0];
+ my $file2 = $_[1];
+
+ my $filesize1 = -s $file1;
+ my $filesize2 = -s $file2;
+
+ #skip anything that isn't a normal file
+ return "" unless -f $file1;
+ return "" unless -f $file2;
+
+ #TODO: this might not be foolproof in some cases where one file is bigger, but still has less lines
+ if($filesize2 > $filesize1) {
+ my $filetemp = $file2;
+ $file2 = $file1;
+ $file1 = $filetemp;
+ }
+
+ my $fname = $file1;
+ $fname =~ s/^\///;
+ $fname =~ s/\//-/g;
+
+ my ($co, $current_output) = tempfile('./'.$fname.'XXXXXXXX', SUFFIX => '.tmp', UNLINK => 1);
+
+ open(my $in1,"<",$file1) or die "Cannot open file ".$file1." for reading: $!";
+ open(my $in2,"<",$file2) or die "Cannot open file ".$file2." for reading: $!";
+ open($co,">>",$current_output) or die "Cannot open file ".$current_output." for writing: $!";
+
+ my $lineno = 1;
+ my $is_mismatch = 0;
+
+ #TODO: if a line exists in file1, but is blank line, and line doesn't exist in file2 because it's shorter, the diff is not picked up
+ while (my $line1 = <$in1>) {
+ my $line2 = <$in2>;
+ $line2 = "\n" unless defined $line2;
+ if ($line1 eq $line2) {
+ ++$lineno;
+ next;
+ }
+ if($is_mismatch<1) {
+ print $co "Mismatch between files: \n".$file1."\n".$file2."\nPlease check:\n";
+ }
+ print $co "line :".$lineno."\n";
+ print $co "$line1";
+ print $co "$line2";
+ $is_mismatch=1;
+ ++$lineno;
+ }
+
+ if ($is_mismatch == 1 && !exists $processed{$fname}) {
+ system "gzip $current_output";
+ $processed{$fname}++;
+ }
+
+ close $co or die "Cannot close file: ".$current_output.": $!";
+ close $in1 or die "Cannot close file: ".$file1.": $!";
+ close $in2 or die "Cannot close file: ".$file2.": $!";
+
+ return $file1 if $is_mismatch == 1;
+ return "";
+}
+
+sub read_list {
+ my $list = $_[0];
+ my $stem1 = $_[1];
+ my $stem2 = $_[2];
+ my $changed_files = $_[3];
+ my %changed_file_list;
+ my $current_file;
+
+ open(my $ll,'<',$list) or die "Cannot open file ".$list." for reading: $!";
+ while (my $line = <$ll>) {
+ chomp $line;
+ if ($line =~ /\.jar$|\.gz$/) {
+ next;
+ }
+ $current_file = compare_files($stem1.$line, $stem2.$line);
+ if ($current_file ne "") {
+ $current_file =~ s/\Q$stem1\E//;
+ $current_file =~ s/\Q$stem2\E//;
+ $changed_file_list{$current_file}++;
+ }
+ }
+
+ close $ll or die "Cannot close file: ".$list.": $!";
+
+ open(my ($chflist), '>>', $changed_files) or die "Cannot open file ".$changed_files." for writing: $!";
+ print $chflist "The following files have differences between git repo and deployment: \n";
+ print $chflist "$_\n" for keys %changed_file_list;
+ print $chflist "\n";
+ close ($chflist) or die "Cannot close file: ".$changed_files.": $!";
+}
+
+#locate all the files in the git repo and in the deployment directory
+find_files($git_locations, $git, @git_found_files);
+find_files($script_locations, $scripts, @scripts_found_files);
+
+#force close these files if they're not closed, otherwise you hit some weird buffering problem
+if($git->opened() == 1) {
+ close $git or die "Cannot close file: $!";
+}
+
+if($scripts->opened() == 1) {
+ close $scripts or die "Cannot close file: $!";
+}
+
+#Figure out which files exist only in one or the other dir structure, and print the list of differences
+list_non_present_files($scripts_files, $git_files, $file_diffs, $stem1, $stem2);
+list_non_present_files($git_files, $scripts_files, $file_diffs, $stem1, $stem2);
+
+#then for the files that exist in BOTH directory structures alike, analyze the differences between all the files
+read_list($file_list, $stem1, $stem2, $changed_files);
+
+