reorganize_groups.awk (2226B)
#!/bin/awk #backstory: data format from dataprovider came in a weird way, made it necessary to compress #some rows before adding them to the database, to reduce table sizes and boost JOIN speeds #this part expands the compressed rows back to their original number of rows #not the nicest looking code, but it did cut a table by ~60% which resulted in a decent boost on the DB side #also see group_by_column.awk # Define helper function to push a value to an array function push(arr, value) { arr[length(arr)+1] = value; } # Helper to print the array in semicolon-delimited format function parray(arr) { for (i = 0; i < length(arr); i++) { printf arr[i] ";"; } printf "\n"; } # Field delimiter = comma, Record delimiter = newline BEGIN { FS = ","; RS = "\n"; } { t = $1; # First column is the key for (x = 2; x <= NF; x++) { # Loop over remaining columns z = y[0]; # Store previous group prefix for comparison match($x, /[^@]+/, y); # Extract prefix before @ (the group) match($x, /@[0-9]+/, w); # Extract value after @ w_ = substr(w[0], 2); # Remove '@' prefix if (x == 2) { z = y[0]; # On first loop, set z to current group } # If same group, keep appending if (z == y[0]) { arr[0] = t; # First element: top-level identifier arr[1] = z; # Second: group push(arr, w_); # Then all group values } # Group changes: flush and reset if (z != y[0]) { parray(arr); split("", arr); # Clear array arr[0] = t; arr[1] = y[0]; push(arr, w_); } # Final column — always flush and reset if (x == NF) { parray(arr); split("", arr); next; } } } END { printf "\n"; # Clean newline at end } #HOW IT WORKS #cat file1 #dsfs,2@44,2@55,3@66,3@88,3@89,3@90,3@91,4@56,4@777,4@65,5@776,5@545 #afsf,1@00,1@99 #bdsa,2@323,2@212,6@432,6@2188 #awk -f reorganize_groups.awk <FILE> #dsfs;2;;44;55; #dsfs;3;;66;88;89;90;91; #dsfs;4;;56;777;65; #dsfs;5;;776;545; #afsf;1;;00;99; #bdsa;2;;323;212; #bdsa;6;;432;2188;