reorganize_groups.awk - scraps - random scraps and notes that are useful to me

reorganize_groups.awk (2226B)
#!/bin/awk

#backstory: data format from dataprovider came in a weird way, made it necessary to compress
#some rows before adding them to the database, to reduce table sizes and boost JOIN speeds
#this part expands the compressed rows back to their original number of rows
#not the nicest looking code, but it did cut a table by ~60% which resulted in a decent boost on the DB side
#also see group_by_column.awk

# Define helper function to push a value to an array
function push(arr, value) {
    arr[length(arr)+1] = value;
}

# Helper to print the array in semicolon-delimited format
function parray(arr) {
    for (i = 0; i < length(arr); i++) {
        printf arr[i] ";";
    }
    printf "\n";
}

# Field delimiter = comma, Record delimiter = newline
BEGIN {
    FS = ",";
    RS = "\n";
}
{
    t = $1;  # First column is the key

    for (x = 2; x <= NF; x++) {  # Loop over remaining columns
        z = y[0];  # Store previous group prefix for comparison
        match($x, /[^@]+/, y);   # Extract prefix before @ (the group)
        match($x, /@[0-9]+/, w); # Extract value after @
        w_ = substr(w[0], 2);    # Remove '@' prefix

        if (x == 2) {
            z = y[0];  # On first loop, set z to current group
        }

        # If same group, keep appending
        if (z == y[0]) {
            arr[0] = t;     # First element: top-level identifier
            arr[1] = z;     # Second: group
            push(arr, w_);  # Then all group values
        }

        # Group changes: flush and reset
        if (z != y[0]) {
            parray(arr);
            split("", arr);           # Clear array
            arr[0] = t; arr[1] = y[0];
            push(arr, w_);
        }

        # Final column — always flush and reset
        if (x == NF) {
            parray(arr);
            split("", arr);
            next;
        }
    }
}
END {
    printf "\n";  # Clean newline at end
}


#HOW IT WORKS
#cat file1
#dsfs,2@44,2@55,3@66,3@88,3@89,3@90,3@91,4@56,4@777,4@65,5@776,5@545
#afsf,1@00,1@99
#bdsa,2@323,2@212,6@432,6@2188

#awk -f reorganize_groups.awk <FILE>
#dsfs;2;;44;55;
#dsfs;3;;66;88;89;90;91;
#dsfs;4;;56;777;65;
#dsfs;5;;776;545;
#afsf;1;;00;99;
#bdsa;2;;323;212;
#bdsa;6;;432;2188;
Back (Current repo: scraps)