Add Git object sizes script #1

Merged
jakob.scheid merged 1 commits from feature/git-repo-object-sizes into main 2026-05-07 16:50:42 +02:00
+84
View File
@@ -0,0 +1,84 @@
#!/usr/bin/env bash
# Shows the largest object of a Git repository:
# - Size
# - Object ID
# - Path
# - Branch(es), the object
#
# Usage:
# ./git-biggest-objects.sh [NUMBER]
#
# Example:
# ./git-biggest-objects.sh 30
set -euo pipefail
COUNT="${1:-20}"
if ! git rev-parse --git-dir >/dev/null 2>&1; then
echo "Fehler: Kein Git-Repository."
exit 1
fi
TMP_OBJECTS="$(mktemp)"
TMP_BRANCHES="$(mktemp)"
cleanup() {
rm -f "$TMP_OBJECTS" "$TMP_BRANCHES"
}
trap cleanup EXIT
echo "Collecting objects ..."
git rev-list --objects --all > "$TMP_OBJECTS"
echo "Collecting branches ..."
while read -r commit branches; do
for branch in $branches; do
echo "$commit $branch"
done
done < <(
git for-each-ref \
--format='%(objectname) %(refname:short)' \
refs/heads
) > "$TMP_BRANCHES"
echo
printf "%-12s %-40s %-50s %s\n" "SIZE" "OBJECT ID" "BRANCH" "PATH"
printf "%-12s %-40s %-50s %s\n" "----" "---------" "------" "-----"
git cat-file --batch-check='%(objectname) %(objecttype) %(objectsize)' < <(
awk '{print $1}' "$TMP_OBJECTS"
) |
awk '$2 == "blob"' |
sort -k3 -n |
tail -n "$COUNT" |
while read -r object type size; do
path="$(grep "^$object " "$TMP_OBJECTS" | sed "s/^$object //")"
# Find commit(s) that contain(s) the object
commits="$(git log --all --find-object="$object" --format='%H' 2>/dev/null || true)"
branches=""
if [[ -n "$commits" ]]; then
while read -r commit; do
found_branches="$(git branch --contains "$commit" --format='%(refname:short)' 2>/dev/null || true)"
branches="${branches} ${found_branches}"
done <<< "$commits"
fi
# Remove duplicates
branches="$(echo "$branches" | tr ' ' '\n' | sort -u | tr '\n' ',' | sed 's/,$//')"
# Human-readable size
human_size="$(numfmt --to=iec-i --suffix=B "$size")"
printf "%-12s %-40s %-50s %s\n" \
"$human_size" \
"$object" \
"${branches:-?}" \
"$path"
done | sort -h