Back (Current repo: scraps)

random scraps and notes that are useful to me
To clone this repository:
git clone https://git.viktor1993.net/scraps.git
Log | Download | Files | Refs

commit 6ce27eaf5bb2fa96ea5553a41d087518e4dac747
parent 8b90b1d85fb937327964a0e9f1de2b495d3dbd90
Author: root <root>
Date:   Tue, 22 Apr 2025 20:33:26 +0200

add git consistency check perl script

Diffstat:
Aperl/git_consistency_check.pl | 231+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 231 insertions(+), 0 deletions(-)

diff --git a/perl/git_consistency_check.pl b/perl/git_consistency_check.pl @@ -0,0 +1,231 @@ +#!/usr/bin/env perl + +use strict; +use warnings; +use File::Temp qw/ tempfile /; +use Fcntl ':flock'; + +open my $self, '<', $0 or die "Couldn't open self: $!"; +flock $self, LOCK_EX | LOCK_NB or die "This script is already running! Aborting!"; + +#EXPLAINER: +# Compare a Git repository (source of truth) to a deployed directory. +# +# Backstory: +# This was created because it was common for folk to add hotfixes in production directly +# which then got overwritten with git deployments causing production incidents +# this script was a first stage necessity in order to clean up, before being able to enforce +# a strict deployment via git only rule. +# +# Recursively checks for: +# - Files that exist in one location but not the other +# - Files with size differences +# +# Useful for: +# - Verifying that a deployment matches the committed source +# - Catching out-of-band edits on production servers +# - Validating integrity of scripts, configs, and assets +# +# Originally written in bash, but it got too unreadable for the job. +# Rewritten in Perl for better sanity and maintainability. +# +# !! Not intended for projects with compiled binaries (hash mismatches are expected) +#USAGE perl <THIS SCRIPT> <GIT REPO PATH> <DEPLOYMENT PATH> + +die "Usage: $0 DIRECTORY1 DIRECTORY2\n" if scalar(@ARGV) < 2; + +my $git_locations = $ARGV[0]; +my $script_locations = $ARGV[1]; + +my @git_found_files; +my @scripts_found_files; +my $file_diffs="./file_diffs.txt"; +my $file_list="./file_list.txt"; +my $changed_files="./changed_files.txt"; +my %processed; + +my $stem1 = $git_locations; +my $stem2 = $script_locations; + +my ($git, $git_files) = tempfile('/tmp/gitrepo-git-consistency-check-XXXXXXXXXXXX', SUFFIX => '.tmp', UNLINK => 0); +my ($scripts, $scripts_files) = tempfile('/tmp/scripts-git-consistency-check-XXXXXXXXXXXX', SUFFIX => '.tmp', UNLINK => 0); + +#recursively search through target directory for all files +sub find_files { + my $path = $_[0]; + my $output = $_[1]; + my @found_files = $_[2]; + + $path .= '/' if($path !~ /\/$/); + + for my $file (glob($path . '*')) { + if(-d $file) { + find_files($file, $output, @found_files); + } else { + print $output $file."\n"; + } + } +} + +#take two lists of files, remove the stems of the path, and compare differences between the lists +sub list_non_present_files { + my $file_diffs = $_[2]; + my $stem1 = $_[3]; + my $stem2 = $_[4]; + my %fl; + my %df; + + open (my $file2,"<",$_[1]) or die "Cannot open file ".$_[1]." for reading: $!"; + my %seen; + while (my $line1 = <$file2>) { + chomp ($line1); + $line1 =~ s/\Q$stem1\E//; + $line1 =~ s/\Q$stem2\E//; + $seen{$line1}++; + } + + close ($file2) or die "Could not finish reading from ".$_[1].": $!"; + + my $match_name = $_[0] =~ /scripts|gitrepo/p; + my $source_name = ${^MATCH}; + + open (my $file1,"<",$_[0]) or die $!; + while (my $line2 = <$file1>) { + chomp $line2; + $line2 =~ s/\Q$stem1\E//; + $line2 =~ s/\Q$stem2\E//; + if($seen{$line2}) { + $fl{$line2}++; + } else { + $df{$line2}++ unless $line2 eq ""; + } + } + + close ($file1) or die "Could not finish reading from ".$_[0].": $!"; + + open(my ($diffs), '>>', $file_diffs) or die "Cannot open file ".$file_diffs." for writing: $!"; + print $diffs "Only in ".$source_name." folder: \n"; + print $diffs "$_\n" for keys %df; + print $diffs "\n"; + close ($diffs) or die "Could not finish writing to ".$file_diffs.": $!"; + + open(my ($flist), '>>', $file_list) or die "Cannot open file ".$file_list." for writing: $!"; + print $flist "$_\n" for keys %fl; + close ($flist) or die "Could not finish writing to ".$file_list.": $!"; + +} + +sub compare_files { + my $file1 = $_[0]; + my $file2 = $_[1]; + + my $filesize1 = -s $file1; + my $filesize2 = -s $file2; + + #skip anything that isn't a normal file + return "" unless -f $file1; + return "" unless -f $file2; + + #TODO: this might not be foolproof in some cases where one file is bigger, but still has less lines + if($filesize2 > $filesize1) { + my $filetemp = $file2; + $file2 = $file1; + $file1 = $filetemp; + } + + my $fname = $file1; + $fname =~ s/^\///; + $fname =~ s/\//-/g; + + my ($co, $current_output) = tempfile('./'.$fname.'XXXXXXXX', SUFFIX => '.tmp', UNLINK => 1); + + open(my $in1,"<",$file1) or die "Cannot open file ".$file1." for reading: $!"; + open(my $in2,"<",$file2) or die "Cannot open file ".$file2." for reading: $!"; + open($co,">>",$current_output) or die "Cannot open file ".$current_output." for writing: $!"; + + my $lineno = 1; + my $is_mismatch = 0; + + #TODO: if a line exists in file1, but is blank line, and line doesn't exist in file2 because it's shorter, the diff is not picked up + while (my $line1 = <$in1>) { + my $line2 = <$in2>; + $line2 = "\n" unless defined $line2; + if ($line1 eq $line2) { + ++$lineno; + next; + } + if($is_mismatch<1) { + print $co "Mismatch between files: \n".$file1."\n".$file2."\nPlease check:\n"; + } + print $co "line :".$lineno."\n"; + print $co "$line1"; + print $co "$line2"; + $is_mismatch=1; + ++$lineno; + } + + if ($is_mismatch == 1 && !exists $processed{$fname}) { + system "gzip $current_output"; + $processed{$fname}++; + } + + close $co or die "Cannot close file: ".$current_output.": $!"; + close $in1 or die "Cannot close file: ".$file1.": $!"; + close $in2 or die "Cannot close file: ".$file2.": $!"; + + return $file1 if $is_mismatch == 1; + return ""; +} + +sub read_list { + my $list = $_[0]; + my $stem1 = $_[1]; + my $stem2 = $_[2]; + my $changed_files = $_[3]; + my %changed_file_list; + my $current_file; + + open(my $ll,'<',$list) or die "Cannot open file ".$list." for reading: $!"; + while (my $line = <$ll>) { + chomp $line; + if ($line =~ /\.jar$|\.gz$/) { + next; + } + $current_file = compare_files($stem1.$line, $stem2.$line); + if ($current_file ne "") { + $current_file =~ s/\Q$stem1\E//; + $current_file =~ s/\Q$stem2\E//; + $changed_file_list{$current_file}++; + } + } + + close $ll or die "Cannot close file: ".$list.": $!"; + + open(my ($chflist), '>>', $changed_files) or die "Cannot open file ".$changed_files." for writing: $!"; + print $chflist "The following files have differences between git repo and deployment: \n"; + print $chflist "$_\n" for keys %changed_file_list; + print $chflist "\n"; + close ($chflist) or die "Cannot close file: ".$changed_files.": $!"; +} + +#locate all the files in the git repo and in the deployment directory +find_files($git_locations, $git, @git_found_files); +find_files($script_locations, $scripts, @scripts_found_files); + +#force close these files if they're not closed, otherwise you hit some weird buffering problem +if($git->opened() == 1) { + close $git or die "Cannot close file: $!"; +} + +if($scripts->opened() == 1) { + close $scripts or die "Cannot close file: $!"; +} + +#Figure out which files exist only in one or the other dir structure, and print the list of differences +list_non_present_files($scripts_files, $git_files, $file_diffs, $stem1, $stem2); +list_non_present_files($git_files, $scripts_files, $file_diffs, $stem1, $stem2); + +#then for the files that exist in BOTH directory structures alike, analyze the differences between all the files +read_list($file_list, $stem1, $stem2, $changed_files); + +