From fc1baf193f34f290f1cf046a007ec451809870a0 Mon Sep 17 00:00:00 2001 From: Jakob Scheid Date: Thu, 7 May 2026 16:50:08 +0200 Subject: [PATCH] Add Git object sizes script --- git-repo-object-sizes.sh | 84 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 84 insertions(+) create mode 100755 git-repo-object-sizes.sh diff --git a/git-repo-object-sizes.sh b/git-repo-object-sizes.sh new file mode 100755 index 0000000..d0cc0cf --- /dev/null +++ b/git-repo-object-sizes.sh @@ -0,0 +1,84 @@ +#!/usr/bin/env bash + +# Shows the largest object of a Git repository: +# - Size +# - Object ID +# - Path +# - Branch(es), the object +# +# Usage: +# ./git-biggest-objects.sh [NUMBER] +# +# Example: +# ./git-biggest-objects.sh 30 + +set -euo pipefail + +COUNT="${1:-20}" + +if ! git rev-parse --git-dir >/dev/null 2>&1; then + echo "Fehler: Kein Git-Repository." + exit 1 +fi + +TMP_OBJECTS="$(mktemp)" +TMP_BRANCHES="$(mktemp)" + +cleanup() { + rm -f "$TMP_OBJECTS" "$TMP_BRANCHES" +} +trap cleanup EXIT + +echo "Collecting objects ..." +git rev-list --objects --all > "$TMP_OBJECTS" + +echo "Collecting branches ..." +while read -r commit branches; do + for branch in $branches; do + echo "$commit $branch" + done +done < <( + git for-each-ref \ + --format='%(objectname) %(refname:short)' \ + refs/heads +) > "$TMP_BRANCHES" + +echo +printf "%-12s %-40s %-50s %s\n" "SIZE" "OBJECT ID" "BRANCH" "PATH" +printf "%-12s %-40s %-50s %s\n" "----" "---------" "------" "-----" + +git cat-file --batch-check='%(objectname) %(objecttype) %(objectsize)' < <( + awk '{print $1}' "$TMP_OBJECTS" +) | +awk '$2 == "blob"' | +sort -k3 -n | +tail -n "$COUNT" | +while read -r object type size; do + + path="$(grep "^$object " "$TMP_OBJECTS" | sed "s/^$object //")" + + # Find commit(s) that contain(s) the object + commits="$(git log --all --find-object="$object" --format='%H' 2>/dev/null || true)" + + branches="" + + if [[ -n "$commits" ]]; then + while read -r commit; do + found_branches="$(git branch --contains "$commit" --format='%(refname:short)' 2>/dev/null || true)" + branches="${branches} ${found_branches}" + done <<< "$commits" + fi + + # Remove duplicates + branches="$(echo "$branches" | tr ' ' '\n' | sort -u | tr '\n' ',' | sed 's/,$//')" + + # Human-readable size + human_size="$(numfmt --to=iec-i --suffix=B "$size")" + + printf "%-12s %-40s %-50s %s\n" \ + "$human_size" \ + "$object" \ + "${branches:-?}" \ + "$path" + +done | sort -h \ No newline at end of file