gen_multi_table.awk (3044B)
#!/usr/bin/awk -f
# gen_multi_table.awk
# A small utility I created when I need to some quick and dirty SQL data for multiple tables with joinable IDs.
#USAGE:
# The utility generates all tables into the same file (to keep the code here simple)
# awk '/^----- TABLE/{n++}{print > "table" n ".csv"}' <output of this script>
# to split them into distinct files
#PARAMS:
# tables = number of tables (default 2)
# rows = rows per table (default 10)
# columns = non-ID columns per table (default 4)
# digits = number of digits for random numbers (default 3)
# pos_only = 1 to disallow negatives (default 1)
# decimal = 1 to truncate to int, 0 to allow floats (default 1)
# delimiter= column separator (default ,)
# join_mode= 1 for joining everything to table 1, 0 for a "chain" of joins (default 0)
function isNegative() {
return rand();
}
function random(digits, pos_only) {
negative = isNegative();
if ((negative <= 0.5) && (pos_only == 0)) {
return (rand() * (10 ** digits)) * -1;
} else {
return (rand() * (10 ** digits));
}
}
function generateNumber(digits, pos_only, decimal, delimiter){
result = random(digits, pos_only);
if (decimal == 1) {
return int(result)""delimiter;
} else {
return result""delimiter;
}
}
function makeHeader(table_index, columns, delimiter) {
header = "id" delimiter;
if (table_index > 1) {
header = header "parent_id" delimiter;
}
for (k = 1; k <= columns; k++) {
if (k == columns) {
header = header "col" k;
} else {
header = header "col" k delimiter;
}
}
return header;
}
BEGIN {
if (tables == ""){tables = 2;}
if (rows == ""){rows = 5;}
if (columns == ""){columns = 3;}
if (digits == ""){digits = 3;}
if (pos_only == ""){pos_only = 1;}
if (decimal == ""){decimal = 1;}
if (delimiter == ""){delimiter = ",";}
if (join_mode == ""){join_mode = 0;}
DELIMITER_LEN = length(delimiter);
srand();
for (t = 1; t <= tables; t++) {
print "----- TABLE "t" -----";
print makeHeader(t, columns, delimiter);
for (i = 1; i <= rows; i++) {
id_val = int(rand() * (10 ** digits));
ids[t, i] = id_val;
line = id_val""delimiter;
if (t > 1) {
parent_index = int(rand() * rows) + 1;
if (join_mode == 0) {
parent_id = ids[t-1, parent_index];
} else {
parent_id = ids[1, parent_index];
}
line = line parent_id delimiter;
}
for (j = 0; j <= columns-1; j++) {
if (j + 1 >= columns-1)
line = line""generateNumber(digits, pos_only, decimal, delimiter);
else
line = line""generateNumber(digits, pos_only, decimal, delimiter);
}
print substr(line, 1, length(line)-DELIMITER_LEN);
}
print "";
}
}