commit 185a19eaa91f679683a4caaf454ef0149a7e9604
parent e31a49cb21fa221a24f2a014426e80001aa4676b
Author: root <root>
Date: Thu, 24 Apr 2025 10:29:53 +0200
new awk script
Diffstat:
1 file changed, 39 insertions(+), 0 deletions(-)
diff --git a/awk/group_by_column_regex.awk b/awk/group_by_column_regex.awk
@@ -0,0 +1,39 @@
+#!/bin/awk
+
+function push(arr, value) {
+ arr[length(arr)+1] = value;
+}
+
+# Helper to print the array in semicolon-delimited format
+function parray(arr) {
+ for (i = 0; i < length(arr); i++) {
+ printf arr[i] ";";
+ }
+ printf "\n";
+}
+BEGIN{
+ FS=",";
+ OFS=",";
+ regex="";
+ if (prefix == ""){prefix = 2}
+ split("",a);
+}
+{
+ #e.g. we want to group together rows that begin with the same two numbers, like 50,501,5010, etc
+ #because the file contains rows as a hierarchy
+ if(length($1)==prefix) {
+ if(length(a) > 0) {
+ parray(a);
+ }
+ split("",a);
+ regex="^"$1
+ push(a,$1"@"$2);
+ } else {
+ if($1 ~ regex) {
+ push(a,$1"@"$2);
+ }
+ }
+}
+END{
+ parray(a);
+}