group_by_column.awk (789B)
#!/bin/awk #backstory: data format from dataprovider came in a weird way, made it necessary to compress #some rows before adding them to the database, to reduce table sizes and boost JOIN speeds #this part compressed rows to a fewer number which cut a table by ~60% which resulted in a decent boost on the DB side #also see: reorganize_groups.awk BEGIN { FS = ","; OFS = ","; } { # If we've already seen this group ($3), append to existing value if (a[$3]) { a[$3] = a[$3] "," $2 "@" $1; } else { # First entry for this group a[$3] = $2 "@" $1; } } END { # Output all grouped values for (x in a) { print x, a[x]; } } #IN: #foo,123,groupA #bar,456,groupA #baz,789,groupB #OUT: #groupA,123@foo,456@bar #groupB,789@baz