#!/bin/bash # This is a bash script for (almost) automatic harvesting of a dataset stored at # the GEOMAR thredds server. # # Dependecies: wget (install on linux, e.g. via apt-get, or on maxos e.g. via homebrew) # wkoeve@geomar.de, v30.04.21, CC-BY-4.0 applies. # Disclaimer: Use at your own risk. # Subject to changes on server side structure and server behaviour. # This script is provided for easy download of data related to the publication: # Koeve, W., Kähler, P., Oschlies, A., 2020, Does Export Production Measure Transient Changes of the Biological Carbon Pump's # Feedback to the Atmosphere Under Global Warming? Geophysical Research Letters, 47, e2020GL089928. https://doi.org/10.1029/2020GL089928. # Stored at: # https://data.geomar.de/downloads/20.500.12085/396970fe-3529-430c-a774-55ccc681795e # I adopted the following variables accordingly: export thredds=data.geomar.de/downloads export obj1=20.500.12085 export obj2=396970fe-3529-430c-a774-55ccc681795e # make sure to use an appropriate search depth & search list export sdepth=5 export slist=.f,.h,.txt,.nc,.in,.sha256,.md5,.md,.nqs,.repro,.pdf,LICENCE #echo $sdepth #echo $slist # the following may be needed before re-running get.wget # rm -rf * wget -nc -r -l$sdepth -A$slist -e robots=off -np "https://$thredds/$obj1/$obj2" # clean up pwd FILE=README.md if test -f "$FILE"; then echo "it seems that this script has been run before" mv $thredds/$obj1/$obj2/* . # try wether there are files left in subdirectories output/* or repro/* for f in $thredds/$obj1/$obj2/repro/*/* do if test -f "$f"; then echo "processing file $f" end=${#f} string=$thredds/$obj1/$obj2/repro start=${#string} len=`expr $end - $start` target=${f:$start+1:$len-1} mv $f repro/$target fi done rm -rf $thredds/$obj1/$obj2/repro/ for f in $thredds/$obj1/$obj2/output/*/* do if test -f "$f"; then echo "processing file $f" end=${#f} string=$thredds/$obj1/$obj2/output start=${#string} len=`expr $end - $start` target=${f:$start+1:$len-1} mv $f output/$target fi done rm -rf $thredds/$obj1/$obj2/output/ else mv $thredds/$obj1/$obj2/* . fi # even more clean up rmdir $thredds/$obj1/$obj2 rmdir $thredds/$obj1 rmdir $thredds rmdir data.geomar.de echo get.wget done