group_by_column_regex.awk (764B)
#!/bin/awk
function push(arr, value) {
arr[length(arr)+1] = value;
}
# Helper to print the array in semicolon-delimited format
function parray(arr) {
for (i = 0; i < length(arr); i++) {
printf arr[i] ";";
}
printf "\n";
}
BEGIN{
FS=",";
OFS=",";
regex="";
if (prefix == ""){prefix = 2}
split("",a);
}
{
#e.g. we want to group together rows that begin with the same two numbers, like 50,501,5010, etc
#because the file contains rows as a hierarchy
if(length($1)==prefix) {
if(length(a) > 0) {
parray(a);
}
split("",a);
regex="^"$1
push(a,$1"@"$2);
} else {
if($1 ~ regex) {
push(a,$1"@"$2);
}
}
}
END{
parray(a);
}