group_by_column_regex.awk (764B)
#!/bin/awk function push(arr, value) { arr[length(arr)+1] = value; } # Helper to print the array in semicolon-delimited format function parray(arr) { for (i = 0; i < length(arr); i++) { printf arr[i] ";"; } printf "\n"; } BEGIN{ FS=","; OFS=","; regex=""; if (prefix == ""){prefix = 2} split("",a); } { #e.g. we want to group together rows that begin with the same two numbers, like 50,501,5010, etc #because the file contains rows as a hierarchy if(length($1)==prefix) { if(length(a) > 0) { parray(a); } split("",a); regex="^"$1 push(a,$1"@"$2); } else { if($1 ~ regex) { push(a,$1"@"$2); } } } END{ parray(a); }