#!/bin/bash
set -eo pipefail

echoerr() {
  # write to stderr
  cat <<< "$@" 1>&2;
}

if [ $UID != "0" ]; then
  echoerr "You must run this script as root."
  exit 1
fi

usage() {
  echo " lostfile 4.13"
  echo " Usage: $0 [-s] [-z] [-h]"
  echo "      Calling without an option runs in relaxed mode sorting by name"
  echo "  -h  display this help"
  echo "  -s  optionally define strict mode"
  echo "  -z  optionally sort results by size"
  exit 0
}

# setup defaults which user can override with switches
postprocess="sort"
make_find_filter="make_relaxed_exclude_list"

while getopts 'hsz' OPTION; do
  case "$OPTION" in
    z)
      postprocess="sort_by_size"
      ;;
    s)
      make_find_filter="make_strict_exclude_list"
      ;;
    h)
      usage
      ;;
    *)
      usage
      ;;
  esac
done
shift $((OPTIND -1))

# Sorts a list of file names by size
sort_by_size() {
  tr '\n' '\0' | xargs -0 -n1 du -s | sort -rn -k1
}

# Converts a list of glob-like strings to a single regular expression
# Only the * wildcard is supported. The rest of the characters are compared literally.
miniglob_list_to_regex() {
  arr=("$@")
  str=$(IFS=$'\n' ; echo "${arr[*]}")
  echo -n "$str" | sed 's/[^^*]/[&]/g; s/\^/\\^/g; s/\*/.*/g' | tr '\n' '|'
}

# reads a list of paths from a configuration file at the specified path
# and adds them to the global exclude list variable
read_config() {
  if [ -f "$1" ]; then
    if grep -q '^[^#$ +-]' "$1"; then
      echoerr 'Invalid configuration file.'
      echoerr 'All lines in '"$1"' must start with #, + or -, $.'
      exit 1
    fi

    readarray -t include_list_from_file < <(grep '^+' "$1" | cut -c 2-)
    include_list=("${include_list[@]}" "${include_list_from_file[@]}")

    readarray -t exclude_list_from_file < <(grep '^-' "$1" | cut -c 2-)
    exclude_list=("${exclude_list[@]}" "${exclude_list_from_file[@]}")

    readarray -t custom_filters_list_from_file < <(grep '^\$' "$1" | cut -c 2-)
    custom_filters_list=("${custom_filters_list[@]}" "${custom_filters_list_from_file[@]}")
  fi
}

# Generates a find filter to implement the relaxed excluded paths list.
# relaxed mode is more forgiving about hits, and excludes files generated by various apps.
make_relaxed_exclude_list() {
  # read configuration files
  read_config "$(dirname "$0")/lostfiles.conf"
  read_config "/etc/lostfiles.conf"

  # build include list from configuration files
  find_filter=("${find_filter[@]}" "${include_list[@]}")

  # build a regex to exclude the paths specified in the configuration files
  exclude_list_regex=$(miniglob_list_to_regex "${exclude_list[@]}")

  find_filter=("${find_filter[@]}" -regextype posix-extended)
  find_filter=("${find_filter[@]}" \( -not \( -regex "|$exclude_list_regex|" -prune \) \))

  # evaluate and add the custom filters (i.e. raw parameters to pass to find)
  for i in "${custom_filters_list[@]}"; do
    eval "temp=($i)"
    find_filter=("${find_filter[@]}" "${temp[@]}")
  done
}

# Do not exclude anything in strict mode, just add the default Arch paths
make_strict_exclude_list() {
   find_filter=(/boot /etc /opt /srv /usr /var)
}

$make_find_filter

LC_ALL=C comm -13 \
  <(LC_ALL=C pacman -Qlq | sed -e 's|/$||' | LC_ALL=C sort -u) \
  <(LC_ALL=C find "${find_filter[@]}" 2>/dev/null | LC_ALL=C sort -u) | $postprocess

# vim:set ts=2 sw=2 et:
