#!/bin/bash
# mariadb-myguard-tuner — quick sanity check of a MariaDB install
# against the deb.myguard.nl tuning recommendations. Reports findings
# as a scorecard; never writes anything. Safe to run on production.
#
# Usage:
#   mariadb-myguard-tuner               # uses /run/mysqld/mysqld.sock as root via socket auth
#   mariadb-myguard-tuner -u USER -p    # prompt for password
#   mariadb-myguard-tuner --socket /path/to/socket
#
# Exit code: number of WARN / FAIL findings (0 == clean).

set -eu

MYSQL_ARGS=()
while [ $# -gt 0 ]; do
    case "$1" in
        -u) MYSQL_ARGS+=("-u" "$2"); shift 2 ;;
        -p) MYSQL_ARGS+=("-p"); shift ;;
        --socket) MYSQL_ARGS+=("--socket=$2"); shift 2 ;;
        -h|--help)
            sed -n '2,12p' "$0" | sed 's/^# \{0,1\}//'
            exit 0 ;;
        *) echo "Unknown arg: $1" >&2; exit 2 ;;
    esac
done

q() {
    mariadb --batch --skip-column-names "${MYSQL_ARGS[@]}" -e "$1" 2>/dev/null
}

# Verify we can connect before any check runs.
if ! q "SELECT 1" >/dev/null; then
    echo "FAIL: cannot connect to mariadb. Try: $0 -u root -p" >&2
    exit 2
fi

# Helpers — track WARN/FAIL count to set exit code.
warn_count=0
ok()   { printf '  \033[32mOK  \033[0m  %s\n' "$*"; }
warn() { printf '  \033[33mWARN\033[0m  %s\n' "$*"; warn_count=$((warn_count + 1)); }
fail() { printf '  \033[31mFAIL\033[0m  %s\n' "$*"; warn_count=$((warn_count + 1)); }
info() { printf '  ----  %s\n' "$*"; }

# ---------- Build identification -------------------------------------------
version=$(q "SELECT VERSION()")
echo "MariaDB version: ${version}"
case "${version}" in
    *myguard*) ok "running a myguard build" ;;
    *)         warn "not running a myguard build — this tuner is calibrated for the deb.myguard.nl defaults" ;;
esac

# ---------- InnoDB buffer pool sizing --------------------------------------
echo
echo "[ buffer pool ]"
total_ram_kb=$(grep -m1 MemTotal /proc/meminfo | awk '{print $2}')
total_ram_bytes=$((total_ram_kb * 1024))
pool_bytes=$(q "SELECT @@innodb_buffer_pool_size")
pool_pct=$(( pool_bytes * 100 / total_ram_bytes ))
info "innodb_buffer_pool_size = $((pool_bytes / 1024 / 1024)) MiB (${pool_pct} % of host RAM)"
if   [ "${pool_pct}" -lt 20 ] && [ "${total_ram_bytes}" -gt $((4 * 1024**3)) ]; then
    warn "buffer pool is under 20 % of RAM on a host with >4 GiB — consider raising it"
elif [ "${pool_pct}" -gt 80 ]; then
    fail "buffer pool exceeds 80 % of RAM — risk of OOM under load"
else
    ok "buffer pool size looks sane for this host"
fi

# ---------- I/O capacity ---------------------------------------------------
echo
echo "[ I/O tuning ]"
iocap=$(q "SELECT @@innodb_io_capacity")
flushm=$(q "SELECT @@innodb_flush_method")
neigh=$(q "SELECT @@innodb_flush_neighbors")
[ "${iocap}" -ge 1000 ] && ok "innodb_io_capacity = ${iocap}" \
                       || warn "innodb_io_capacity = ${iocap} — HDD-era default; set ≥2000 on SSD"
[ "${flushm}" = "O_DIRECT" ] && ok "innodb_flush_method = O_DIRECT" \
                            || warn "innodb_flush_method = ${flushm} — recommend O_DIRECT on SSD"
[ "${neigh}" = "0" ] && ok "innodb_flush_neighbors = 0 (SSD-tuned)" \
                    || warn "innodb_flush_neighbors = ${neigh} — set 0 on SSD"

# ---------- Threading ------------------------------------------------------
echo
echo "[ threading ]"
tphdl=$(q "SELECT @@thread_handling")
[ "${tphdl}" = "pool-of-threads" ] && ok "thread_handling = pool-of-threads" \
                                  || warn "thread_handling = ${tphdl} — pool-of-threads scales better past ~100 connections"

# ---------- Slow query log -------------------------------------------------
echo
echo "[ observability ]"
slow=$(q "SELECT @@slow_query_log")
[ "${slow}" = "1" ] && ok "slow_query_log enabled" \
                   || warn "slow_query_log disabled — turn on (cost = ~0) to catch latency outliers"

# ---------- Query cache (should be off) ------------------------------------
qct=$(q "SELECT @@query_cache_type")
[ "${qct}" = "OFF" ] || [ "${qct}" = "0" ] && ok "query cache off (deprecated, contention point)" \
                                          || fail "query_cache_type = ${qct} — disable, it's a contention point"

# ---------- Connection load --------------------------------------------------
echo
echo "[ connection load ]"
maxc=$(q "SELECT @@max_connections")
maxused=$(q "SHOW GLOBAL STATUS LIKE 'Max_used_connections'" | awk '{print $2}')
info "max_connections = ${maxc}, peak seen = ${maxused}"
if [ "${maxused}" -ge "$((maxc * 90 / 100))" ]; then
    warn "peak ${maxused} is within 10 % of max_connections=${maxc} — raise the cap"
else
    ok "headroom available on connection cap"
fi

echo
if [ "${warn_count}" -eq 0 ]; then
    printf '\033[32mAll checks passed.\033[0m\n'
else
    printf '\033[33m%d finding(s) above. See https://deb.myguard.nl for tuning notes.\033[0m\n' "${warn_count}"
fi
exit "${warn_count}"
