shelley_

· qlyntraex's pastes · raw

expires: 2026-07-29

  1#!/bin/sh
  2# Munin multigraph plugin: shelley_
  3#
  4# Monitors a Shelley instance via its SQLite database.
  5#
  6# Graphs:
  7#   shelley_conversations — total conversations (DERIVE, shows creation rate)
  8#   shelley_messages     — messages by role: user, agent, tool, system (DERIVE)
  9#   shelley_requests     — LLM requests by model (DERIVE, stacked)
 10#   shelley_latency      — average request duration by model (GAUGE)
 11#   shelley_tokens       — input/output tokens by model (DERIVE, stacked)
 12#   shelley_cache        — cache read vs creation tokens by model (GAUGE)
 13#
 14# Config:
 15#   [shelley_]
 16#   env.SHELLEY_DB /home/exedev/.config/shelley/shelley.db
 17#   user root
 18#
 19# Note: munin-node on Debian runs with ProtectHome=true by default,
 20# which hides /home/. Either set ProtectHome=read-only via a systemd
 21# drop-in override, or place the DB outside /home/.
 22#
 23#%# family=manual
 24#%# capabilities=autoconf
 25
 26SHELLEY_DB="${SHELLEY_DB:-/home/exedev/.config/shelley/shelley.db}"
 27
 28# --- helpers ---------------------------------------------------------------
 29
 30sqlite_query() {
 31    sqlite3 "$SHELLEY_DB" "$1"
 32}
 33
 34# Sanitize a model name into a valid munin field name:
 35# must match ^[A-Za-z_][A-Za-z0-9_]*$
 36sanitize_field() {
 37    echo "$1" | sed 's/[^A-Za-z0-9_]/_/g; s/^[0-9]/_/'
 38}
 39
 40# --- autoconf --------------------------------------------------------------
 41
 42if [ "${1:-}" = "autoconf" ]; then
 43    if [ ! -f "$SHELLEY_DB" ]; then
 44        echo "no (database not found at ${SHELLEY_DB})"
 45        exit 0
 46    fi
 47    if ! sqlite3 "$SHELLEY_DB" "SELECT 1" >/dev/null 2>&1; then
 48        echo "no (cannot query database at ${SHELLEY_DB})"
 49        exit 0
 50    fi
 51    echo "yes"
 52    exit 0
 53fi
 54
 55# --- discover models -------------------------------------------------------
 56
 57# Get distinct models from llm_requests for request/latency graphs
 58MODELS_REQUESTS=$(sqlite_query "SELECT DISTINCT model FROM llm_requests ORDER BY model;")
 59# Get distinct models from messages.usage_data for token/cache graphs
 60MODELS_TOKENS=$(sqlite_query "SELECT DISTINCT json_extract(usage_data, '$.model') FROM messages WHERE usage_data IS NOT NULL AND json_extract(usage_data, '$.input_tokens') > 0 ORDER BY 1;")
 61
 62# --- config ----------------------------------------------------------------
 63
 64if [ "${1:-}" = "config" ]; then
 65
 66    # --- shelley_conversations ---
 67    cat <<'EOF'
 68multigraph shelley_conversations
 69graph_title Shelley conversations
 70graph_vlabel conversations / ${graph_period}
 71graph_category shelley
 72graph_args -l 0 --base 1000
 73graph_info Total conversations tracked. Munin computes the creation rate per period.
 74conversations.label Conversations
 75conversations.type DERIVE
 76conversations.min 0
 77
 78EOF
 79
 80    # --- shelley_messages ---
 81    cat <<'EOF'
 82multigraph shelley_messages
 83graph_title Shelley messages by role
 84graph_vlabel messages / ${graph_period}
 85graph_category shelley
 86graph_args -l 0 --base 1000
 87graph_info Message count by role. Munin computes the rate per period.
 88user.label User
 89user.type DERIVE
 90user.min 0
 91user.draw AREA
 92agent.label Agent
 93agent.type DERIVE
 94agent.min 0
 95agent.draw STACK
 96tool.label Tool
 97tool.type DERIVE
 98tool.min 0
 99tool.draw STACK
100system.label System
101system.type DERIVE
102system.min 0
103system.draw STACK
104
105EOF
106
107    # --- shelley_requests ---
108    echo "multigraph shelley_requests"
109    echo "graph_title Shelley LLM requests by model"
110    echo "graph_vlabel requests / ${graph_period}"
111    echo "graph_category shelley"
112    echo "graph_args -l 0 --base 1000"
113    echo "graph_info LLM request count by model. Munin computes the rate per period."
114    FIRST=1
115    echo "$MODELS_REQUESTS" | while read -r model; do
116        [ -z "$model" ] && continue
117        field=$(sanitize_field "$model")
118        echo "${field}.label ${model}"
119        echo "${field}.type DERIVE"
120        echo "${field}.min 0"
121        if [ "$FIRST" = 1 ]; then
122            echo "${field}.draw AREA"
123            FIRST=0
124        else
125            echo "${field}.draw STACK"
126        fi
127    done
128    echo
129
130    # --- shelley_latency ---
131    echo "multigraph shelley_latency"
132    echo "graph_title Shelley LLM request latency"
133    echo "graph_vlabel ms"
134    echo "graph_category shelley"
135    echo "graph_args -l 0 --base 1000"
136    echo "graph_info Average request duration in milliseconds by model."
137    echo "$MODELS_REQUESTS" | while read -r model; do
138        [ -z "$model" ] && continue
139        field=$(sanitize_field "$model")
140        echo "${field}.label ${model}"
141        echo "${field}.type GAUGE"
142        echo "${field}.min 0"
143    done
144    echo
145
146    # --- shelley_tokens ---
147    echo "multigraph shelley_tokens"
148    echo "graph_title Shelley token throughput by model"
149    echo "graph_vlabel tokens / ${graph_period}"
150    echo "graph_category shelley"
151    echo "graph_args -l 0 --base 1000"
152    echo "graph_info Input and output token counts by model. Munin computes the rate per period."
153    echo "$MODELS_TOKENS" | while read -r model; do
154        [ -z "$model" ] && continue
155        field=$(sanitize_field "$model")
156        echo "${field}_in.label ${model} input
157${field}_in.type DERIVE
158${field}_in.min 0
159${field}_out.label ${model} output
160${field}_out.type DERIVE
161${field}_out.min 0
162${field}_out.draw STACK"
163    done
164    echo
165
166    # --- shelley_cache ---
167    echo "multigraph shelley_cache"
168    echo "graph_title Shelley cache efficiency by model"
169    echo "graph_vlabel tokens"
170    echo "graph_category shelley"
171    echo "graph_args -l 0 --base 1000"
172    echo "graph_info Prompt cache read vs creation tokens by model. High cache_read means good cache hit rate."
173    echo "$MODELS_TOKENS" | while read -r model; do
174        [ -z "$model" ] && continue
175        field=$(sanitize_field "$model")
176        echo "${field}_read.label ${model} cache read
177${field}_read.type GAUGE
178${field}_read.min 0
179${field}_read.draw AREA
180${field}_create.label ${model} cache create
181${field}_create.type GAUGE
182${field}_create.min 0
183${field}_create.draw STACK"
184    done
185    echo
186
187    exit 0
188fi
189
190# --- fetch (values) --------------------------------------------------------
191
192# conversations
193CONV_COUNT=$(sqlite_query "SELECT COUNT(*) FROM conversations;")
194echo "multigraph shelley_conversations"
195echo "conversations.value ${CONV_COUNT:-U}"
196echo
197
198# messages by role
199echo "multigraph shelley_messages"
200for role in user agent tool system; do
201    COUNT=$(sqlite_query "SELECT COUNT(*) FROM messages WHERE type='${role}';")
202    echo "${role}.value ${COUNT:-U}"
203done
204echo
205
206# requests by model
207echo "multigraph shelley_requests"
208echo "$MODELS_REQUESTS" | while read -r model; do
209    [ -z "$model" ] && continue
210    field=$(sanitize_field "$model")
211    COUNT=$(sqlite_query "SELECT COUNT(*) FROM llm_requests WHERE model='${model}';")
212    echo "${field}.value ${COUNT:-U}"
213done
214echo
215
216# latency by model
217echo "multigraph shelley_latency"
218echo "$MODELS_REQUESTS" | while read -r model; do
219    [ -z "$model" ] && continue
220    field=$(sanitize_field "$model")
221    AVG=$(sqlite_query "SELECT CAST(AVG(duration_ms) AS INTEGER) FROM llm_requests WHERE model='${model}' AND duration_ms IS NOT NULL;")
222    echo "${field}.value ${AVG:-U}"
223done
224echo
225
226# tokens by model
227echo "multigraph shelley_tokens"
228echo "$MODELS_TOKENS" | while read -r model; do
229    [ -z "$model" ] && continue
230    field=$(sanitize_field "$model")
231    # usage_data is a JSON array; extract per-message token counts for matching model
232    IN_TOKENS=$(sqlite_query "SELECT COALESCE(SUM(json_extract(usage_data, '$.input_tokens')),0) FROM messages WHERE usage_data IS NOT NULL AND json_extract(usage_data, '$.model')='${model}' AND json_extract(usage_data, '$.input_tokens') > 0;")
233    OUT_TOKENS=$(sqlite_query "SELECT COALESCE(SUM(json_extract(usage_data, '$.output_tokens')),0) FROM messages WHERE usage_data IS NOT NULL AND json_extract(usage_data, '$.model')='${model}' AND json_extract(usage_data, '$.input_tokens') > 0;")
234    echo "${field}_in.value ${IN_TOKENS:-U}"
235    echo "${field}_out.value ${OUT_TOKENS:-U}"
236done
237echo
238
239# cache by model
240echo "multigraph shelley_cache"
241echo "$MODELS_TOKENS" | while read -r model; do
242    [ -z "$model" ] && continue
243    field=$(sanitize_field "$model")
244    CACHE_READ=$(sqlite_query "SELECT COALESCE(SUM(json_extract(usage_data, '$.cache_read_input_tokens')),0) FROM messages WHERE usage_data IS NOT NULL AND json_extract(usage_data, '$.model')='${model}' AND json_extract(usage_data, '$.input_tokens') > 0;")
245    CACHE_CREATE=$(sqlite_query "SELECT COALESCE(SUM(json_extract(usage_data, '$.cache_creation_input_tokens')),0) FROM messages WHERE usage_data IS NOT NULL AND json_extract(usage_data, '$.model')='${model}' AND json_extract(usage_data, '$.input_tokens') > 0;")
246    echo "${field}_read.value ${CACHE_READ:-U}"
247    echo "${field}_create.value ${CACHE_CREATE:-U}"
248done