#!/bin/bash

# Copyright 2008 Antonio Bonifati
# Distributed under the GNU General Public License (GPL)
#
# Optimal selection of files to burn to a CD or DVD.
# This script finds an optimal solution to the problem
# of filling up only one (1) read-only media with files,
# using the branch and bound recursive algorithm.
#
# This differs from dirsplit(1) shipped with cdrkit which assumes that
# the user wants to burn all files using multiple volumes (if necessary).
# Also bbb output is better and can be used with xargs(1).
#
# Example Usage:
#   # bbb -0fcatalog /path/to/dir/* | xargs -0 growisofs -Z /dev/dvd -v -l \
#     -dry-run -iso-level 3 -r -J -speed=2 -V MYDVD -joliet-long -graft-points
#
#   If there is no overburning, remove -dry-run to actually burn the disc.
#   When finished you can remove burned files from hard-disk with:
#
#   # bbb -0 /path/to/dir/* | xargs -0 rm -r
#
# Rerefences:
#   Algorithms and Data Structures, Niklaus Wirth, 1986, ISBN: 0-13-021999-1

# BLOCKSIZE-byte blocks are used to compute file space usage
# This value should match block size used by recorded media
# 2048 is the standard size of a data CDs and DVDs sector
export BLOCKSIZE=2048

# Default media size in blocks (multiply for BLOCKSIZE to get bytes).
# This doesn't include filesystem overhead, so don't make it too high.
# You can find this out for a media with:
#   $ dvd+rw-mediainfo /dev/dvd
SIZELIMIT=2254438

# This must be enabled before definition of setsizelimit.
# Enabling the `extglob' changes the behavior of the
# shell parser, since the interpretation of `(' is modified depending on
# context.  Since functions are parsed when they're defined, not when
# they're executed, enabling the `extglob' option inside the function
# will not affect how the function is parsed.
shopt -s extglob
setsizelimit() {
  local number=+([[:digit:]])
  case "$1" in
    ${number}GB ) SIZELIMIT=$((${1%GB}*1000000000)) ;;
    ${number}MB ) SIZELIMIT=$((${1%MB}*1000000)) ;;
    ${number}G  ) SIZELIMIT=$((${1%G}*1073741824)) ;;
    ${number}M  ) SIZELIMIT=$((${1%M}*1048576)) ;;
    ${number}   ) SIZELIMIT="$1" ;;
    *           ) echo "Invalid size limit \`$1'." >&2; exit 1 ;;
  esac
}

usage() {
  cat <<-HELP
	Usage: $PROGNAME [OPTION] FILE...
	\`Branch & Bound for Burning': optimal selection of files to burn to a media

	   -0, --null               terminate output files by a null character
	   -f, --format=FORMAT      output format; defaults to \`plain'
	   -h, --help               display this help and exit
	   -o, --output=OUTFILE     output to OUTFILE instead of stdin (-)
	   -s, --size-limit=BYTES   set media capacity; defaults roughly to a data DVD
	   -v, --version            display version and exit

	BYTES can be followed by the following multiplicative suffixes:
	MB 1000*1000, M 1024*1024, GB 1000*1000*1000, G 1024*1024*1024

	FORMAT can be: \`plain' simple file list, \`catalog'
	genisoimage(1) catalog, \`brasero' brasero(1) data project (XML)

	If FILE is a directory it is taken as a whole. 
HELP
}

# 1 = The index of file to try
# 2 = The total size of the selection s so far made
try()
{
  # The size if the selection s so far made including current file
  newsize=$(($2 + ${size[$1]}))

  # The index of the next file to try
  newindex=$(($1 + 1))

  # If inclusion is acceptable
  if [ $newsize -le $SIZELIMIT ]; then
    # Include i th file
    selection[$1]=1

    if [ $newindex -lt $n ]; then
      try $newindex $newsize
    else # check optimality
      # The still wasted size of the current selection s
      wastedsize=$(($SIZELIMIT - $newsize))

      if [ $wastedsize -lt $minwastedsize ]; then
        # New optimum, record it
        minwastedsize=$wastedsize
        optselection=("${selection[@]}")
      fi
    fi

    # Eliminate i th file
    selection[$1]=
  fi

  # If exclusion is acceptable
  wastedsize=$(($SIZELIMIT - $2))
  if [ $wastedsize -lt $minwastedsize ]; then
    if [ $newindex -lt $n ]; then
      try $newindex $2
    elif [ $wastedsize -gt 0 ]; then # check optimality
      # New optimum, record it
      minwastedsize=$wastedsize
      optselection=("${selection[@]}")
    fi
  fi
}

uriencode() {
  perl -lne '~s/([^\w\-\._'\''()])/$c=$1; $1 =~ m{[\/:]} ? sprintf("%%%2.2X",ord($c)) : sprintf("%%25%2.2X",ord($c))/eg; print'
}

xmlentities() {
  # also
  #   perl -MHTML::Entities -lne 'print encode_entities($_)'
  # could be used, but it requires module HTML::Entities to be installed
  perl -lne '%ent = ("&" => "amp", "\"" => "quot", "<" => "lt", ">" => "gt"); ~s/([&"<>])/$ent{"$1"} ? "&$ent{\"$1\"};" : ""/eg; print'
}

# Parse command options
PROGNAME=$(basename "$0")
FORMAT=plain
# TODO: this usage of getopt is GNU-specific, e.g. does not work in BSD
TEMP=$(getopt -o 0f:ho:s:v --long null,format:,help,output:,size-limit:,version \
  -n "$PROGNAME" -- "$@")
if [ $? != 0 ] ; then exit 1 ; fi
eval set -- "$TEMP"
while true; do
  case "$1" in
    -0|--null) NULL=1; shift ;;
    -f|--format)
      shopt -s nocasematch
      case "$2" in
        plain   ) FORMAT=plain ;;
        catalog ) FORMAT=catalog ;;
        brasero ) FORMAT=brasero ;;
        *       ) echo "Invalid format \`$2'." >&2; exit 1 ;;
      esac
      shopt -u nocasematch
      shift 2 ;;
    -h|--help) usage; exit ;;
    -o|--output)
      OUTFILE="$2"
      [ "$OUTFILE" = - ] && unset OUTFILE
      shift 2 ;;
    -s|--size-limit) setsizelimit "$2"; shift 2 ;;
    -v|--version) echo "$PROGNAME  Ver 1.0"; exit ;;
    --) shift; break ;;
  esac
done

# Handle remaining arguments
n=0
for arg do
  file[$n]="$arg"
  # option -B is not standard and not used. Exporting
  # variable BLOCKSIZE (see above) is preferred
  # TODO: also --apparent-size is GNU-du specific
  size[$n]=$(du -sL --apparent-size "$arg" | awk '{ print $1 }')
  [ ${size[$n]} ] || exit 1
  totsize=$(($totsize + ${size[$((n++))]}))
done
[ $n -gt 0 ] || { usage; exit 1; }

# Run main algorithm
minwastedsize=$SIZELIMIT
for (( i=0; i<n; i++ )); do
  selection[$i]=
done
try 0 0

# Output results
[ ${#optselection[@]} -eq 0 ] && { echo "No solution found" >&2; exit; }
[ $FORMAT = brasero ] && {
  cat <<EOX
<?xml version="1.0" encoding="UTF8"?>
<braseroproject>
	<version>0.2</version>
	<track>
		<data>
EOX
}
for (( i=0; i<n; i++ )); do
  if [ ${optselection[$i]} ]; then
    {
      case $FORMAT in
        plain   ) echo -n "${file[$i]}" ;;
        catalog )
          path=$(echo "${file[$i]}" | sed 's/\([\=]\)/\\\1/g')
          echo -n "/$(basename "$path")=$path"
          ;;
        brasero )
          echo -e '\t\t\t<graft>'
          echo -e "\t\t\t\t<path>/$(basename "${file[$i]}" | xmlentities)</path>"
	  echo -e "\t\t\t\t<uri>$(echo "file://$(dirname "${file[$i]}")/" | uriencode)$(echo "$(basename "${file[$i]}" | uriencode)")</uri>"
          echo -en '\t\t\t</graft>'
          ;;
      esac
      if [ "$NULL" ]; then
        echo -en \\0
      else
        echo
      fi;
    } | if [ "$OUTFILE" ]; then cat>>"$OUTFILE"; else cat; fi
  fi
done
[ $FORMAT = brasero ] && {
  cat <<EOX
		</data>
	</track>
</braseroproject>
EOX
}
# Use point as a decimal separator because bc doesn't support other locales
LC_NUMERIC=C
printf "\nWasted:\n%10u  $BLOCKSIZE-byte blocks\n%10g  MB\n%10g  M\n" \
  $minwastedsize $(echo "scale=2; $minwastedsize*$BLOCKSIZE/1000000" | bc) \
  $(echo "scale=2; $minwastedsize*$BLOCKSIZE/1048576" | bc) >&2

# End of file
