Back (Current repo: scraps)

random scraps and notes that are useful to me
To clone this repository:
git clone https://git.viktor1993.net/scraps.git
Log | Download | Files | Refs

gen_multi_table_with_dates.awk (3622B)


#!/usr/bin/awk -f
# gen_multi_table.awk
# A small utility I created when I need to some quick and dirty SQL data for multiple tables with joinable IDs.

#USAGE:
# The utility generates all tables into the same file (to keep the code here simple)
# awk '/^----- TABLE/{n++}{print > "table" n ".csv"}' <output of this script>
# to split them into distinct files

#PARAMS:
# tables   = number of tables (default 2)
# rows     = rows per table (default 10)
# columns  = non-ID columns per table (default 4)
# digits   = number of digits for random numbers (default 3)
# pos_only = 1 to disallow negatives (default 1)
# decimal  = 1 to truncate to int, 0 to allow floats (default 1)
# delimiter= column separator (default ,)
# join_mode= 1 for joining everything to table 1, 0 for a "chain" of joins (default 0)
# gen_date = 1 if want table 1 to generate a column with dates (default 0)

function isNegative() {
    return rand();
}

function random(digits, pos_only) {
    negative = isNegative();
    if ((negative <= 0.5) && (pos_only == 0)) {
        return (rand() * (10 ** digits)) * -1;
    } else {
        return (rand() * (10 ** digits));
    }
}

function generateNumber(digits, pos_only, decimal, delimiter){
    result = random(digits, pos_only);
    if (decimal == 1) {
        return int(result)""delimiter;
    } else {
        return result""delimiter;
    }
}

function makeHeader(table_index, columns, delimiter, gen_date) {
    header = "id" delimiter;
    if ((table_index == 1) && (gen_date == 1)) {
        header = "id,stamp_created" delimiter;
    }
    if (table_index > 1) {
        header = header "parent_id" delimiter;
    }
    for (k = 1; k <= columns; k++) {
        if (k == columns) {
            header = header "col" k;
        } else {
            header = header "col" k delimiter;
        }
    }
    return header;
}

function generateDate(years_back, now, past, span, ts) {
    now = systime()
    past = now - (years_back * 365 * 24 * 60 * 60)
    span = now - past
    ts = past + int(rand() * span)
    return strftime("%Y-%m-%d", ts)
}

BEGIN {
    if (tables == ""){tables = 2;}
    if (rows == ""){rows = 5;}
    if (columns == ""){columns = 3;}
    if (digits == ""){digits = 3;}
    if (pos_only == ""){pos_only = 1;}
    if (decimal == ""){decimal = 1;}
    if (delimiter == ""){delimiter = ",";}
    if (join_mode == ""){join_mode = 0;}
    if (gen_date == ""){gen_date = 0;}
    DELIMITER_LEN = length(delimiter);

    srand();

    for (t = 1; t <= tables; t++) {
        print "----- TABLE "t" -----";
        print makeHeader(t, columns, delimiter, gen_date);

        for (i = 1; i <= rows; i++) {
            id_val = int(rand() * (10 ** digits));
            ids[t, i] = id_val;

            line = id_val""delimiter;

            if ((t == 1) && (gen_date == 1)) {
                line = line generateDate(10) delimiter;
            }

            if (t > 1) {
                parent_index = int(rand() * rows) + 1;
                if (join_mode == 0) {
                    parent_id = ids[t-1, parent_index];
                } else {
                    parent_id = ids[1, parent_index];
                }
                line = line parent_id delimiter;
            }

            for (j = 0; j <= columns-1; j++) {
                if (j + 1 >= columns-1)
                    line = line""generateNumber(digits, pos_only, decimal, delimiter);
                else
                    line = line""generateNumber(digits, pos_only, decimal, delimiter);
            }

            print substr(line, 1, length(line)-DELIMITER_LEN);
        }
        print "";
    }
}