syntaxai/tdd.md · main · tools / sama-cli / src / b32_utils.sh

b32_utils.sh 478 lines · 14429 bytes raw
# b32 — logic: helpers shared across the seven §4 checks. Pure
# functions: profile parsing, file walking, declared-layer lookup,
# relative-import collection, string/comment masking, line counting,
# and output helpers. Sourced by b32_checks.sh and d21_main.sh.
# Never reads files outside the explicitly-passed paths.

# Profile state — populated by parse_profile. Parallel arrays:
#   PROFILE_LAYERS[i]            = "0" | "1" | "2" | "3"
#   PROFILE_PREFIXES[i]          = "a31_" etc.
#   PROFILE_SUBLAYER_NAMES[i]    = "default" | "logic" etc.
#   PROFILE_SUBLAYER_INDEXES[i]  = "0" | "1" ...
PROFILE_NAME=""
PROFILE_SAMA_VERSION=""
PROFILE_EXTENSION=".ts"
PROFILE_LAYERS=()
PROFILE_PREFIXES=()
PROFILE_SUBLAYER_NAMES=()
PROFILE_SUBLAYER_INDEXES=()

# — parse_profile -------------------------------------------------
# Reads a sama.profile.toml file and populates the PROFILE_* arrays.
parse_profile() {
  local file="$1"
  PROFILE_NAME="$(_extract_top_scalar "$file" profile)"
  PROFILE_SAMA_VERSION="$(_extract_top_scalar "$file" sama_version)"
  local ext
  ext="$(_extract_top_scalar "$file" extension)"
  if [ -n "$ext" ]; then
    PROFILE_EXTENSION="$ext"
  else
    PROFILE_EXTENSION=".ts"
  fi
  PROFILE_LAYERS=()
  PROFILE_PREFIXES=()
  PROFILE_SUBLAYER_NAMES=()
  PROFILE_SUBLAYER_INDEXES=()
  local layer_num
  for layer_num in 0 1 2 3; do
    _parse_layer_section "$file" "$layer_num"
  done
}

# Extract a scalar key=value pair from the top-of-file section
# (before any [section] header). Strips surrounding quotes.
_extract_top_scalar() {
  local file="$1"
  local key="$2"
  awk -v key="$key" '
    /^[[:space:]]*\[/ { exit }
    {
      line = $0
      sub(/#.*$/, "", line)
      sub(/^[[:space:]]+/, "", line)
      sub(/[[:space:]]+$/, "", line)
      if (line == "") next
      eq = index(line, "=")
      if (eq == 0) next
      k = substr(line, 1, eq - 1)
      sub(/[[:space:]]+$/, "", k)
      if (k != key) next
      v = substr(line, eq + 1)
      sub(/^[[:space:]]+/, "", v)
      sub(/[[:space:]]+$/, "", v)
      first = substr(v, 1, 1)
      last = substr(v, length(v), 1)
      if ((first == "\"" && last == "\"") || (first == "\x27" && last == "\x27")) {
        v = substr(v, 2, length(v) - 2)
      }
      print v
      exit
    }
  ' "$file"
}

# Parse one [layers.N] section into the PROFILE_* arrays.
_parse_layer_section() {
  local file="$1"
  local layer="$2"
  local body
  body="$(awk -v target="layers.$layer" '
    /^[[:space:]]*\[/ {
      sec = $0
      sub(/^[[:space:]]*\[/, "", sec)
      sub(/\].*$/, "", sec)
      gsub(/[[:space:]]/, "", sec)
      in_target = (sec == target) ? 1 : 0
      next
    }
    in_target == 1 { print }
  ' "$file")"
  [ -z "$body" ] && return 0

  # prefixes = ["a", "b"] (single-line array)
  local prefixes_line
  prefixes_line="$(echo "$body" | grep -E '^[[:space:]]*prefixes[[:space:]]*=' | head -1)"
  if [ -n "$prefixes_line" ]; then
    local raw="${prefixes_line#*=}"
    raw="$(echo "$raw" | sed 's/^[[:space:]]*//;s/[[:space:]]*$//')"
    raw="${raw#\[}"
    raw="${raw%\]}"
    local idx=0
    local saved_ifs="$IFS"
    IFS=','
    local p
    for p in $raw; do
      p="$(echo "$p" | sed 's/[[:space:]]//g;s/"//g;s/'\''//g')"
      [ -z "$p" ] && continue
      PROFILE_LAYERS+=("$layer")
      PROFILE_PREFIXES+=("$p")
      PROFILE_SUBLAYER_NAMES+=("default")
      PROFILE_SUBLAYER_INDEXES+=("$idx")
      idx=$((idx + 1))
    done
    IFS="$saved_ifs"
    return 0
  fi

  # sublayers = [ { ... }, { ... } ] (single or multi-line)
  local sublayers_block
  sublayers_block="$(echo "$body" | awk '
    /^[[:space:]]*sublayers[[:space:]]*=[[:space:]]*\[/ {
      in_block = 1
      if ($0 ~ /\][[:space:]]*$/) {
        line = $0
        sub(/^[^[]*\[/, "", line)
        sub(/\][[:space:]]*$/, "", line)
        print line
        in_block = 0
      }
      next
    }
    in_block == 1 && /^[[:space:]]*\]/ { in_block = 0; next }
    in_block == 1 { print }
  ')"

  if [ -n "$sublayers_block" ]; then
    local idx=0
    local line
    while IFS= read -r line; do
      [ -z "$line" ] && continue
      local name prefix
      name="$(echo "$line" | sed -n 's/.*name[[:space:]]*=[[:space:]]*"\([^"]*\)".*/\1/p')"
      prefix="$(echo "$line" | sed -n 's/.*prefix[[:space:]]*=[[:space:]]*"\([^"]*\)".*/\1/p')"
      if [ -n "$name" ] && [ -n "$prefix" ]; then
        PROFILE_LAYERS+=("$layer")
        PROFILE_PREFIXES+=("$prefix")
        PROFILE_SUBLAYER_NAMES+=("$name")
        PROFILE_SUBLAYER_INDEXES+=("$idx")
        idx=$((idx + 1))
      fi
    done <<< "$sublayers_block"
  fi
}

# — declared_layer ------------------------------------------------
# Emits "<layer> <sublayer_name> <sublayer_index> <prefix>" for the
# FIRST profile prefix that matches the basename of <path>.
# Returns 1 if no prefix matches.
declared_layer() {
  local path="$1"
  local base="${path##*/}"
  local i
  for i in "${!PROFILE_PREFIXES[@]}"; do
    local prefix="${PROFILE_PREFIXES[$i]}"
    case "$base" in
      "$prefix"*)
        echo "${PROFILE_LAYERS[$i]} ${PROFILE_SUBLAYER_NAMES[$i]} ${PROFILE_SUBLAYER_INDEXES[$i]} $prefix"
        return 0
        ;;
    esac
  done
  return 1
}

# — all_prefix_matches --------------------------------------------
# Emits "<layer> <prefix>" for every prefix that the basename of
# <path> starts with.
all_prefix_matches() {
  local path="$1"
  local base="${path##*/}"
  local i
  for i in "${!PROFILE_PREFIXES[@]}"; do
    local prefix="${PROFILE_PREFIXES[$i]}"
    case "$base" in
      "$prefix"*) echo "${PROFILE_LAYERS[$i]} $prefix" ;;
    esac
  done
}

# — file classifiers ----------------------------------------------
is_sama_file() {
  local path="$1"
  local ext="$PROFILE_EXTENSION"
  case "$path" in
    *".test${ext}") return 1 ;;
    *"${ext}") return 0 ;;
    *) return 1 ;;
  esac
}

is_test_file() {
  local path="$1"
  local ext="$PROFILE_EXTENSION"
  case "$path" in
    *".test${ext}") return 0 ;;
    *) return 1 ;;
  esac
}

# — list_repo_files -----------------------------------------------
# Walks <src_dir> non-recursively, emits one repo-relative path
# per line, lex-sorted. <repo_root> is used to strip the prefix.
list_repo_files() {
  local repo_root="$1"
  local src_dir="$2"
  local ext="$PROFILE_EXTENSION"
  find "$src_dir" -mindepth 1 -maxdepth 1 -type f -name "*${ext}" 2>/dev/null \
    | sort \
    | while IFS= read -r path; do
        echo "${path#${repo_root}/}"
      done
}

# — collect_imports_ts --------------------------------------------
# Emits one resolved repo-relative path per line for every
# `from "./xxx.ts"` import in <raw_file> that is NOT inside a
# JS/TS string literal or comment.
#
# Implemented as a single awk pass over the file that tracks
# string + comment state character-by-character. Mirrors
# collectRelativeImports + stripStringsAndComments in
# src/a31_sama_v2.ts.
_emit_ts_imports_in() {
  awk '
    function nextc(   c) {
      if (idx > len) return ""
      c = substr(line, idx, 1); idx++; return c
    }
    {
      line = $0
      len = length(line)
      idx = 1
      while (idx <= len) {
        c = substr(line, idx, 1)
        d = (idx < len) ? substr(line, idx + 1, 1) : ""

        if (in_comment_line) {
          idx++
          continue
        }
        if (in_comment_block) {
          if (c == "*" && d == "/") { in_comment_block = 0; idx += 2; continue }
          idx++
          continue
        }
        if (in_string != "") {
          if (c == "\\") { idx += 2; continue }
          if (c == in_string) in_string = ""
          idx++
          continue
        }
        if (c == "/" && d == "/") { in_comment_line = 1; idx += 2; continue }
        if (c == "/" && d == "*") { in_comment_block = 1; idx += 2; continue }
        if (c == "\"" || c == "\x27" || c == "`") { in_string = c; idx++; continue }

        # Look for `from` keyword (with word-boundary before)
        if (c == "f" && substr(line, idx, 4) == "from") {
          prev = (idx == 1) ? "" : substr(line, idx - 1, 1)
          if (prev !~ /[A-Za-z0-9_$]/) {
            j = idx + 4
            while (j <= len && substr(line, j, 1) ~ /[ \t]/) j++
            if (j <= len) {
              q = substr(line, j, 1)
              if (q == "\"" || q == "\x27") {
                k = j + 1
                start = k
                while (k <= len && substr(line, k, 1) != q) k++
                if (k <= len) {
                  path = substr(line, start, k - start)
                  if (substr(path, 1, 2) == "./" && substr(path, length(path) - 2) == ".ts") {
                    print path
                  }
                  idx = k + 1
                  continue
                }
              }
            }
          }
        }
        idx++
      }
      in_comment_line = 0
    }
  '
}

# — collect_imports_sh --------------------------------------------
# Emits relative paths for every `# sama-import: xxx.sh` comment in
# <raw_file>. Shell sourcing is too heterogeneous to parse robustly
# (paths via ${VAR}, $BASH_SOURCE, parameter expansion, etc.) — so
# the sub-project annotates each import with an explicit comment
# line that the verifier reads as ground truth. The actual `source`
# / `.` invocations are expected to live below those comments and
# are not parsed.
_emit_sh_imports_in() {
  awk '
    /^[[:space:]]*#[[:space:]]*sama-import:[[:space:]]*[a-zA-Z0-9_.-]+\.sh/ {
      line = $0
      sub(/.*sama-import:[[:space:]]*/, "", line)
      sub(/[[:space:]].*$/, "", line)
      if (line != "") print line
    }
  '
}

collect_imports() {
  local file="$1"
  local repo_root="$2"
  local dir="${file%/*}"
  local raw_file="$repo_root/$file"
  local imp
  if [ "$PROFILE_EXTENSION" = ".ts" ]; then
    _emit_ts_imports_in < "$raw_file" \
      | while IFS= read -r imp; do
          [ -z "$imp" ] && continue
          echo "${dir}/${imp#./}"
        done
  else
    _emit_sh_imports_in < "$raw_file" \
      | while IFS= read -r imp; do
          [ -z "$imp" ] && continue
          echo "${dir}/${imp#./}"
        done
  fi
}

# — strip_strings_and_comments ------------------------------------
# Blanks out JS/TS string literals (', ", `) and comments (// and
# /* */) to whitespace, preserving newlines so line numbers stay
# aligned. Mirrors stripStringsAndComments in src/a31_sama_v2.ts.
strip_strings_and_comments_file() {
  local file="$1"
  if [ "$PROFILE_EXTENSION" != ".ts" ]; then
    cat "$file"
    return 0
  fi
  awk '
    BEGIN { RS = "\x01" }   # never occurs — read whole file as one record
    {
      src = $0
      n = length(src)
      out = ""
      i = 1
      while (i <= n) {
        c = substr(src, i, 1)
        d = (i < n) ? substr(src, i + 1, 1) : ""
        if (c == "/" && d == "/") {
          out = out "  "
          i += 2
          while (i <= n && substr(src, i, 1) != "\n") { out = out " "; i++ }
        } else if (c == "/" && d == "*") {
          out = out "  "
          i += 2
          while (i < n && !(substr(src, i, 1) == "*" && substr(src, i + 1, 1) == "/")) {
            ch = substr(src, i, 1)
            out = (ch == "\n") ? out "\n" : out " "
            i++
          }
          out = out "  "
          i += 2
        } else if (c == "\"" || c == "\x27" || c == "`") {
          q = c
          out = out " "
          i++
          while (i <= n && substr(src, i, 1) != q) {
            if (substr(src, i, 1) == "\\" && i + 1 <= n) {
              out = out "  "
              i += 2
              continue
            }
            ch = substr(src, i, 1)
            out = (ch == "\n") ? out "\n" : out " "
            i++
          }
          out = out " "
          i++
        } else {
          out = out c
          i++
        }
      }
      printf "%s", out
    }
  ' "$file"
}

# — count_lines ---------------------------------------------------
# Returns (number of "\n" in file) + 1, matching TS
# content.split("\n").length exactly. wc -l counts newlines, not
# the split-length, so we add 1 to align with TS behavior.
count_lines() {
  local file="$1"
  local nl
  nl="$(tr -cd '\n' < "$file" | wc -c)"
  echo $((nl + 1))
}

# — is_barrel -----------------------------------------------------
# Returns 0 if <file> is a barrel re-export file: ≥2 non-empty
# code lines (after stripping), and every code line is
# `export ... from ...`. Only meaningful for .ts.
is_barrel() {
  local file="$1"
  [ "$PROFILE_EXTENSION" = ".ts" ] || return 1
  local stripped
  stripped="$(strip_strings_and_comments_file "$file")"
  local code_lines
  code_lines="$(echo "$stripped" | sed 's/^[[:space:]]*//;s/[[:space:]]*$//' | grep -cE '.')"
  [ "$code_lines" -ge 2 ] || return 1
  local export_lines
  export_lines="$(echo "$stripped" \
    | sed 's/^[[:space:]]*//;s/[[:space:]]*$//' \
    | grep -E '.' \
    | grep -cE '^export[[:space:]]+(\*|\{).*\bfrom\b')"
  [ "$export_lines" -eq "$code_lines" ] && return 0 || return 1
}

# — parse_boundary_matches ----------------------------------------
# Emits matched pattern names ("JSON.parse" / "new URL") one per
# line for any pattern occurring in the STRIPPED contents of <file>.
# Only meaningful for .ts; .sh returns nothing.
parse_boundary_matches() {
  local file="$1"
  [ "$PROFILE_EXTENSION" = ".ts" ] || return 0
  local stripped
  stripped="$(strip_strings_and_comments_file "$file")"
  if echo "$stripped" | grep -qE "$PARSE_BOUNDARY_PATTERN_TS_JSON_PARSE"; then
    echo "JSON.parse"
  fi
  if echo "$stripped" | grep -qE "$PARSE_BOUNDARY_PATTERN_TS_NEW_URL"; then
    echo "new URL"
  fi
}

# — output helpers ------------------------------------------------
SAMA_COLOR_ENABLED=1
sama_color_disable() { SAMA_COLOR_ENABLED=0; }
_c() {
  if [ "$SAMA_COLOR_ENABLED" = "1" ]; then printf "%s" "$1"; fi
  return 0
}

print_section_header() {
  echo
  _c "$COLOR_BOLD"
  echo "── $1 ──────────────────────────────────────"
  _c "$COLOR_RESET"
}

print_check_verdict() {
  local id="$1" name="$2" examined="$3" violations="$4"
  if [ "$violations" -eq 0 ]; then
    _c "$COLOR_GREEN"
    printf "  %s Check %s: %s — %d examined, 0 violations\n" "$GLYPH_PASS" "$id" "$name" "$examined"
    _c "$COLOR_RESET"
  else
    _c "$COLOR_RED"
    printf "  %s Check %s: %s — %d examined, %d violations\n" "$GLYPH_FAIL" "$id" "$name" "$examined" "$violations"
    _c "$COLOR_RESET"
  fi
}

print_violation() {
  _c "$COLOR_DIM"
  printf "      %s  %s\n" "$1" "$2"
  _c "$COLOR_RESET"
}