syntaxai/tdd.md · main · tools / sama-cli / src / b32_utils.sh
# b32 — logic: helpers shared across the seven §4 checks. Pure
# functions: profile parsing, file walking, declared-layer lookup,
# relative-import collection, string/comment masking, line counting,
# and output helpers. Sourced by b32_checks.sh and d21_main.sh.
# Never reads files outside the explicitly-passed paths.
# Profile state — populated by parse_profile. Parallel arrays:
# PROFILE_LAYERS[i] = "0" | "1" | "2" | "3"
# PROFILE_PREFIXES[i] = "a31_" etc.
# PROFILE_SUBLAYER_NAMES[i] = "default" | "logic" etc.
# PROFILE_SUBLAYER_INDEXES[i] = "0" | "1" ...
PROFILE_NAME=""
PROFILE_SAMA_VERSION=""
PROFILE_EXTENSION=".ts"
PROFILE_LAYERS=()
PROFILE_PREFIXES=()
PROFILE_SUBLAYER_NAMES=()
PROFILE_SUBLAYER_INDEXES=()
# — parse_profile -------------------------------------------------
# Reads a sama.profile.toml file and populates the PROFILE_* arrays.
parse_profile() {
local file="$1"
PROFILE_NAME="$(_extract_top_scalar "$file" profile)"
PROFILE_SAMA_VERSION="$(_extract_top_scalar "$file" sama_version)"
local ext
ext="$(_extract_top_scalar "$file" extension)"
if [ -n "$ext" ]; then
PROFILE_EXTENSION="$ext"
else
PROFILE_EXTENSION=".ts"
fi
PROFILE_LAYERS=()
PROFILE_PREFIXES=()
PROFILE_SUBLAYER_NAMES=()
PROFILE_SUBLAYER_INDEXES=()
local layer_num
for layer_num in 0 1 2 3; do
_parse_layer_section "$file" "$layer_num"
done
}
# Extract a scalar key=value pair from the top-of-file section
# (before any [section] header). Strips surrounding quotes.
_extract_top_scalar() {
local file="$1"
local key="$2"
awk -v key="$key" '
/^[[:space:]]*\[/ { exit }
{
line = $0
sub(/#.*$/, "", line)
sub(/^[[:space:]]+/, "", line)
sub(/[[:space:]]+$/, "", line)
if (line == "") next
eq = index(line, "=")
if (eq == 0) next
k = substr(line, 1, eq - 1)
sub(/[[:space:]]+$/, "", k)
if (k != key) next
v = substr(line, eq + 1)
sub(/^[[:space:]]+/, "", v)
sub(/[[:space:]]+$/, "", v)
first = substr(v, 1, 1)
last = substr(v, length(v), 1)
if ((first == "\"" && last == "\"") || (first == "\x27" && last == "\x27")) {
v = substr(v, 2, length(v) - 2)
}
print v
exit
}
' "$file"
}
# Parse one [layers.N] section into the PROFILE_* arrays.
_parse_layer_section() {
local file="$1"
local layer="$2"
local body
body="$(awk -v target="layers.$layer" '
/^[[:space:]]*\[/ {
sec = $0
sub(/^[[:space:]]*\[/, "", sec)
sub(/\].*$/, "", sec)
gsub(/[[:space:]]/, "", sec)
in_target = (sec == target) ? 1 : 0
next
}
in_target == 1 { print }
' "$file")"
[ -z "$body" ] && return 0
# prefixes = ["a", "b"] (single-line array)
local prefixes_line
prefixes_line="$(echo "$body" | grep -E '^[[:space:]]*prefixes[[:space:]]*=' | head -1)"
if [ -n "$prefixes_line" ]; then
local raw="${prefixes_line#*=}"
raw="$(echo "$raw" | sed 's/^[[:space:]]*//;s/[[:space:]]*$//')"
raw="${raw#\[}"
raw="${raw%\]}"
local idx=0
local saved_ifs="$IFS"
IFS=','
local p
for p in $raw; do
p="$(echo "$p" | sed 's/[[:space:]]//g;s/"//g;s/'\''//g')"
[ -z "$p" ] && continue
PROFILE_LAYERS+=("$layer")
PROFILE_PREFIXES+=("$p")
PROFILE_SUBLAYER_NAMES+=("default")
PROFILE_SUBLAYER_INDEXES+=("$idx")
idx=$((idx + 1))
done
IFS="$saved_ifs"
return 0
fi
# sublayers = [ { ... }, { ... } ] (single or multi-line)
local sublayers_block
sublayers_block="$(echo "$body" | awk '
/^[[:space:]]*sublayers[[:space:]]*=[[:space:]]*\[/ {
in_block = 1
if ($0 ~ /\][[:space:]]*$/) {
line = $0
sub(/^[^[]*\[/, "", line)
sub(/\][[:space:]]*$/, "", line)
print line
in_block = 0
}
next
}
in_block == 1 && /^[[:space:]]*\]/ { in_block = 0; next }
in_block == 1 { print }
')"
if [ -n "$sublayers_block" ]; then
local idx=0
local line
while IFS= read -r line; do
[ -z "$line" ] && continue
local name prefix
name="$(echo "$line" | sed -n 's/.*name[[:space:]]*=[[:space:]]*"\([^"]*\)".*/\1/p')"
prefix="$(echo "$line" | sed -n 's/.*prefix[[:space:]]*=[[:space:]]*"\([^"]*\)".*/\1/p')"
if [ -n "$name" ] && [ -n "$prefix" ]; then
PROFILE_LAYERS+=("$layer")
PROFILE_PREFIXES+=("$prefix")
PROFILE_SUBLAYER_NAMES+=("$name")
PROFILE_SUBLAYER_INDEXES+=("$idx")
idx=$((idx + 1))
fi
done <<< "$sublayers_block"
fi
}
# — declared_layer ------------------------------------------------
# Emits "<layer> <sublayer_name> <sublayer_index> <prefix>" for the
# FIRST profile prefix that matches the basename of <path>.
# Returns 1 if no prefix matches.
declared_layer() {
local path="$1"
local base="${path##*/}"
local i
for i in "${!PROFILE_PREFIXES[@]}"; do
local prefix="${PROFILE_PREFIXES[$i]}"
case "$base" in
"$prefix"*)
echo "${PROFILE_LAYERS[$i]} ${PROFILE_SUBLAYER_NAMES[$i]} ${PROFILE_SUBLAYER_INDEXES[$i]} $prefix"
return 0
;;
esac
done
return 1
}
# — all_prefix_matches --------------------------------------------
# Emits "<layer> <prefix>" for every prefix that the basename of
# <path> starts with.
all_prefix_matches() {
local path="$1"
local base="${path##*/}"
local i
for i in "${!PROFILE_PREFIXES[@]}"; do
local prefix="${PROFILE_PREFIXES[$i]}"
case "$base" in
"$prefix"*) echo "${PROFILE_LAYERS[$i]} $prefix" ;;
esac
done
}
# — file classifiers ----------------------------------------------
is_sama_file() {
local path="$1"
local ext="$PROFILE_EXTENSION"
case "$path" in
*".test${ext}") return 1 ;;
*"${ext}") return 0 ;;
*) return 1 ;;
esac
}
is_test_file() {
local path="$1"
local ext="$PROFILE_EXTENSION"
case "$path" in
*".test${ext}") return 0 ;;
*) return 1 ;;
esac
}
# — list_repo_files -----------------------------------------------
# Walks <src_dir> non-recursively, emits one repo-relative path
# per line, lex-sorted. <repo_root> is used to strip the prefix.
list_repo_files() {
local repo_root="$1"
local src_dir="$2"
local ext="$PROFILE_EXTENSION"
find "$src_dir" -mindepth 1 -maxdepth 1 -type f -name "*${ext}" 2>/dev/null \
| sort \
| while IFS= read -r path; do
echo "${path#${repo_root}/}"
done
}
# — collect_imports_ts --------------------------------------------
# Emits one resolved repo-relative path per line for every
# `from "./xxx.ts"` import in <raw_file> that is NOT inside a
# JS/TS string literal or comment.
#
# Implemented as a single awk pass over the file that tracks
# string + comment state character-by-character. Mirrors
# collectRelativeImports + stripStringsAndComments in
# src/a31_sama_v2.ts.
_emit_ts_imports_in() {
awk '
function nextc( c) {
if (idx > len) return ""
c = substr(line, idx, 1); idx++; return c
}
{
line = $0
len = length(line)
idx = 1
while (idx <= len) {
c = substr(line, idx, 1)
d = (idx < len) ? substr(line, idx + 1, 1) : ""
if (in_comment_line) {
idx++
continue
}
if (in_comment_block) {
if (c == "*" && d == "/") { in_comment_block = 0; idx += 2; continue }
idx++
continue
}
if (in_string != "") {
if (c == "\\") { idx += 2; continue }
if (c == in_string) in_string = ""
idx++
continue
}
if (c == "/" && d == "/") { in_comment_line = 1; idx += 2; continue }
if (c == "/" && d == "*") { in_comment_block = 1; idx += 2; continue }
if (c == "\"" || c == "\x27" || c == "`") { in_string = c; idx++; continue }
# Look for `from` keyword (with word-boundary before)
if (c == "f" && substr(line, idx, 4) == "from") {
prev = (idx == 1) ? "" : substr(line, idx - 1, 1)
if (prev !~ /[A-Za-z0-9_$]/) {
j = idx + 4
while (j <= len && substr(line, j, 1) ~ /[ \t]/) j++
if (j <= len) {
q = substr(line, j, 1)
if (q == "\"" || q == "\x27") {
k = j + 1
start = k
while (k <= len && substr(line, k, 1) != q) k++
if (k <= len) {
path = substr(line, start, k - start)
if (substr(path, 1, 2) == "./" && substr(path, length(path) - 2) == ".ts") {
print path
}
idx = k + 1
continue
}
}
}
}
}
idx++
}
in_comment_line = 0
}
'
}
# — collect_imports_sh --------------------------------------------
# Emits relative paths for every `# sama-import: xxx.sh` comment in
# <raw_file>. Shell sourcing is too heterogeneous to parse robustly
# (paths via ${VAR}, $BASH_SOURCE, parameter expansion, etc.) — so
# the sub-project annotates each import with an explicit comment
# line that the verifier reads as ground truth. The actual `source`
# / `.` invocations are expected to live below those comments and
# are not parsed.
_emit_sh_imports_in() {
awk '
/^[[:space:]]*#[[:space:]]*sama-import:[[:space:]]*[a-zA-Z0-9_.-]+\.sh/ {
line = $0
sub(/.*sama-import:[[:space:]]*/, "", line)
sub(/[[:space:]].*$/, "", line)
if (line != "") print line
}
'
}
collect_imports() {
local file="$1"
local repo_root="$2"
local dir="${file%/*}"
local raw_file="$repo_root/$file"
local imp
if [ "$PROFILE_EXTENSION" = ".ts" ]; then
_emit_ts_imports_in < "$raw_file" \
| while IFS= read -r imp; do
[ -z "$imp" ] && continue
echo "${dir}/${imp#./}"
done
else
_emit_sh_imports_in < "$raw_file" \
| while IFS= read -r imp; do
[ -z "$imp" ] && continue
echo "${dir}/${imp#./}"
done
fi
}
# — strip_strings_and_comments ------------------------------------
# Blanks out JS/TS string literals (', ", `) and comments (// and
# /* */) to whitespace, preserving newlines so line numbers stay
# aligned. Mirrors stripStringsAndComments in src/a31_sama_v2.ts.
strip_strings_and_comments_file() {
local file="$1"
if [ "$PROFILE_EXTENSION" != ".ts" ]; then
cat "$file"
return 0
fi
awk '
BEGIN { RS = "\x01" } # never occurs — read whole file as one record
{
src = $0
n = length(src)
out = ""
i = 1
while (i <= n) {
c = substr(src, i, 1)
d = (i < n) ? substr(src, i + 1, 1) : ""
if (c == "/" && d == "/") {
out = out " "
i += 2
while (i <= n && substr(src, i, 1) != "\n") { out = out " "; i++ }
} else if (c == "/" && d == "*") {
out = out " "
i += 2
while (i < n && !(substr(src, i, 1) == "*" && substr(src, i + 1, 1) == "/")) {
ch = substr(src, i, 1)
out = (ch == "\n") ? out "\n" : out " "
i++
}
out = out " "
i += 2
} else if (c == "\"" || c == "\x27" || c == "`") {
q = c
out = out " "
i++
while (i <= n && substr(src, i, 1) != q) {
if (substr(src, i, 1) == "\\" && i + 1 <= n) {
out = out " "
i += 2
continue
}
ch = substr(src, i, 1)
out = (ch == "\n") ? out "\n" : out " "
i++
}
out = out " "
i++
} else {
out = out c
i++
}
}
printf "%s", out
}
' "$file"
}
# — count_lines ---------------------------------------------------
# Returns (number of "\n" in file) + 1, matching TS
# content.split("\n").length exactly. wc -l counts newlines, not
# the split-length, so we add 1 to align with TS behavior.
count_lines() {
local file="$1"
local nl
nl="$(tr -cd '\n' < "$file" | wc -c)"
echo $((nl + 1))
}
# — is_barrel -----------------------------------------------------
# Returns 0 if <file> is a barrel re-export file: ≥2 non-empty
# code lines (after stripping), and every code line is
# `export ... from ...`. Only meaningful for .ts.
is_barrel() {
local file="$1"
[ "$PROFILE_EXTENSION" = ".ts" ] || return 1
local stripped
stripped="$(strip_strings_and_comments_file "$file")"
local code_lines
code_lines="$(echo "$stripped" | sed 's/^[[:space:]]*//;s/[[:space:]]*$//' | grep -cE '.')"
[ "$code_lines" -ge 2 ] || return 1
local export_lines
export_lines="$(echo "$stripped" \
| sed 's/^[[:space:]]*//;s/[[:space:]]*$//' \
| grep -E '.' \
| grep -cE '^export[[:space:]]+(\*|\{).*\bfrom\b')"
[ "$export_lines" -eq "$code_lines" ] && return 0 || return 1
}
# — parse_boundary_matches ----------------------------------------
# Emits matched pattern names ("JSON.parse" / "new URL") one per
# line for any pattern occurring in the STRIPPED contents of <file>.
# Only meaningful for .ts; .sh returns nothing.
parse_boundary_matches() {
local file="$1"
[ "$PROFILE_EXTENSION" = ".ts" ] || return 0
local stripped
stripped="$(strip_strings_and_comments_file "$file")"
if echo "$stripped" | grep -qE "$PARSE_BOUNDARY_PATTERN_TS_JSON_PARSE"; then
echo "JSON.parse"
fi
if echo "$stripped" | grep -qE "$PARSE_BOUNDARY_PATTERN_TS_NEW_URL"; then
echo "new URL"
fi
}
# — output helpers ------------------------------------------------
SAMA_COLOR_ENABLED=1
sama_color_disable() { SAMA_COLOR_ENABLED=0; }
_c() {
if [ "$SAMA_COLOR_ENABLED" = "1" ]; then printf "%s" "$1"; fi
return 0
}
print_section_header() {
echo
_c "$COLOR_BOLD"
echo "── $1 ──────────────────────────────────────"
_c "$COLOR_RESET"
}
print_check_verdict() {
local id="$1" name="$2" examined="$3" violations="$4"
if [ "$violations" -eq 0 ]; then
_c "$COLOR_GREEN"
printf " %s Check %s: %s — %d examined, 0 violations\n" "$GLYPH_PASS" "$id" "$name" "$examined"
_c "$COLOR_RESET"
else
_c "$COLOR_RED"
printf " %s Check %s: %s — %d examined, %d violations\n" "$GLYPH_FAIL" "$id" "$name" "$examined" "$violations"
_c "$COLOR_RESET"
fi
}
print_violation() {
_c "$COLOR_DIM"
printf " %s %s\n" "$1" "$2"
_c "$COLOR_RESET"
}