add two awk scripts - scraps - random scraps and notes that are useful to me

commit 1d03a8603ef8ea30bc2c529a6e08d6985746e6bd
parent 0434ffdf59daea73b55040fe5bda70caa8194545
Author: root <root>
Date:   Mon, 21 Apr 2025 00:00:57 +0200

add two awk scripts

Diffstat:
A awk/group_by_column.awk  | 36 ++++++++++++++++++++++++++++++++++++
A awk/reorganize_groups.awk  | 65 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
R php/call_private_function_outside_class.php -> php/snippets/call_private_function_outside_class.php  | 0

3 files changed, 101 insertions(+), 0 deletions(-)
diff --git a/awk/group_by_column.awk b/awk/group_by_column.awk
@@ -0,0 +1,36 @@
+#!/bin/awk
+
+#backstory: data format from dataprovider came in a weird way, made it necessary to compress
+#some rows before adding them to the database, to reduce table sizes and boost JOIN speeds
+#this part compressed rows to a fewer number which cut a table by ~60% which resulted in a decent boost on the DB side
+#also see: reorganize_groups.awk
+
+BEGIN {
+    FS = ",";
+    OFS = ",";
+}
+{
+    # If we've already seen this group ($3), append to existing value
+    if (a[$3]) {
+        a[$3] = a[$3] "," $2 "@" $1;
+    } else {
+        # First entry for this group
+        a[$3] = $2 "@" $1;
+    }
+}
+END {
+    # Output all grouped values
+    for (x in a) {
+        print x, a[x];
+    }
+}
+
+#IN:
+#foo,123,groupA
+#bar,456,groupA
+#baz,789,groupB
+
+OUT:
+#groupA,123@foo,456@bar
+#groupB,789@baz
+
diff --git a/awk/reorganize_groups.awk b/awk/reorganize_groups.awk
@@ -0,0 +1,65 @@
+#!/bin/awk
+
+#backstory: data format from dataprovider came in a weird way, made it necessary to compress
+#some rows before adding them to the database, to reduce table sizes and boost JOIN speeds
+#this part expands the compressed rows back to their original number of rows
+#not the nicest looking code, but it did cut a table by ~60% which resulted in a decent boost on the DB side
+#also see group_by_column.awk
+
+# Define helper function to push a value to an array
+function push(arr, value) {
+    arr[length(arr)+1] = value;
+}
+
+# Helper to print the array in semicolon-delimited format
+function parray(arr) {
+    for (i = 0; i < length(arr); i++) {
+        printf arr[i] ";";
+    }
+    printf "\n";
+}
+
+# Field delimiter = comma, Record delimiter = newline
+BEGIN {
+    FS = ",";
+    RS = "\n";
+}
+{
+    t = $1;  # First column is the key
+
+    for (x = 2; x <= NF; x++) {  # Loop over remaining columns
+        z = y[0];  # Store previous group prefix for comparison
+        match($x, /[^@]+/, y);   # Extract prefix before @ (the group)
+        match($x, /@[0-9]+/, w); # Extract value after @
+        w_ = substr(w[0], 2);    # Remove '@' prefix
+
+        if (x == 2) {
+            z = y[0];  # On first loop, set z to current group
+        }
+
+        # If same group, keep appending
+        if (z == y[0]) {
+            arr[0] = t;     # First element: top-level identifier
+            arr[1] = z;     # Second: group
+            push(arr, w_);  # Then all group values
+        }
+
+        # Group changes: flush and reset
+        if (z != y[0]) {
+            parray(arr);
+            split("", arr);           # Clear array
+            arr[0] = t; arr[1] = y[0];
+            push(arr, w_);
+        }
+
+        # Final column — always flush and reset
+        if (x == NF) {
+            parray(arr);
+            split("", arr);
+            next;
+        }
+    }
+}
+END {
+    printf "\n";  # Clean newline at end
+}
diff --git a/php/call_private_function_outside_class.php b/php/snippets/call_private_function_outside_class.php

A	awk/group_by_column.awk	\|	36	++++++++++++++++++++++++++++++++++++
A	awk/reorganize_groups.awk	\|	65	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
R	php/call_private_function_outside_class.php -> php/snippets/call_private_function_outside_class.php	\|	0

Back (Current repo: scraps)