#!/bin/bash

# Use wget utility to download AWS S3 bucket contents.
# Downloads are saved to a sub-directory under the with the same name as the bucket, i.e. images-shared-683
# bucket and contents must be publicly accessible
# may be safely restarted when necessary

display_usage() { 
	echo "$(basename "$0") [-h] [-w | -m] -- program to download the waveforms from MIMIC II WDB" 
	echo 
	echo "Usage:"
	echo "    -h, --help		Show this help text"
	echo "    -m, --matched 	Download the matched waveforms"
	echo "    -w, --waveforms	Download the waveforms"
	echo 
	} 

Waveforms()
{
	bucket='physionet-pds';
	prefix='PhysioBank';
	database='mimic2wdb';
	RealDB='mimic3wdb';
	# parse the XML provided by AWS for this bucket and compile list of child keys
	# directories=$(wget --quiet -O - "https://s3.amazonaws.com/$bucket/$prefix/$database/RECORDS");
	directories=$(wget --quiet -O - "https://s3.amazonaws.com/$bucket/$prefix/$database/RECORDS");
	for directory in $directories; do
		printf '\n%s %s' 'Working with ' "$RealDB/$directory";
		files=$(wget --quiet -O - "http://$bucket.s3.amazonaws.com/?prefix=$prefix/$database/$directory" | grep -Eo '<Key>[^<]+' | cut -c6-);
		fcount=0;
		for f in $files; do
			outdir=$(dirname ${f/#$prefix})
			outdir=${outdir#/$database}
			outdir=$RealDB$outdir
			if [ ! -d ".$outdir" ]; then
				mkdir -p "$outdir";
			fi
			filename=$(basename "$f");
			wget --no-clobber --quiet --output-document="$outdir/$filename" "https://s3.amazonaws.com/$bucket/$f"
	     	fcount=$((fcount+1));
		done
		printf '\n%s %d %s\n' 'FINISHED - Downloaded ' $fcount ' files.';
	done
}

Matched_Waveforms()
{
	bucket='physionet-pds';
	prefix='PhysioBank';
	database='mimic3wdb';

	directories=$(wget --quiet -O - "https://s3.amazonaws.com/$bucket/$prefix/$database/matched/RECORDS");
	for directory in $directories; do
		printf '\n%s %s' 'Working with ' "$database/matched/$directory";
		files=$(wget --quiet -O - "http://$bucket.s3.amazonaws.com/?prefix=$prefix/$database/matched/$directory" | grep -Eo '<Key>[^<]+' | cut -c6-);

		fcount=0;

		for f in $files; do
			outdir=$(dirname ${f/#$prefix/})
			if [ ! -d ".$outdir" ]; then
				mkdir -p ".$outdir";
			fi
			filename=$(basename "$f");
			wget --no-clobber --quiet --output-document=".$outdir/$filename" "https://s3.amazonaws.com/$bucket/$f"
	     	fcount=$((fcount+1));
		done
		printf '\n%s %d %s\n' 'FINISHED - Downloaded ' $fcount ' files.';
	done
}

Matched=false
Wave=false

[ $# -eq 0 ] && display_usage

while getopts ":hmw" arg; do
  case $arg in
    m) Matched=true
      ;;
    w) Wave=true
      ;;
    h | *) # Display help.
      display_usage
      exit 0
      ;;
  esac
done

if $Matched; then Matched_Waveforms; fi
if $Wave; then Waveforms; fi

exit 1





