#! /bin/bash # # esm-ssp585-ocn-alk_EXP1.post # # Generated by Make Experiments! (mkexp) 1.1.5rc2 # # $Id: DEFAULT.post.tmpl 2 2021-02-08 13:12:54Z m221078 $ # # $Id: DEFAULT.config 1 2021-02-03 20:53:47Z m221078 $ # $Id$ # $Id$ # $Id: CDRSYNTRA_OUTPUT.config 9899 2019-07-01 09:57:06Z m221078 $ # $Id: levante.config $ # ############################################################################### # # The Parameters in section TIME PARAMETERS are edited by the calling script # just before submission. # ############################################################################### # # # Setup for levante (SLURM) # # $Id: levante.tmpl $ # #SBATCH --job-name=esm-ssp585-ocn-alk_EXP1_post #SBATCH --partition=shared #SBATCH --tasks-per-node=8 #SBATCH --time=00:20:00 #SBATCH --output=%x_%j.log #SBATCH --mail-type=FAIL,ARRAY_TASKS #SBATCH --account=bm1241 #SBATCH --propagate=STACK,CORE ulimit -s 390625 # * 1024 B = 400 MB # OpenMPI export HDF5_USE_FILE_LOCKING=FALSE export MALLOC_TRIM_THRESHOLD_="-1" export KMP_AFFINITY="granularity=fine,scatter" export KMP_LIBRARY="turnaround" export MKL_DEBUG_CPU_TYPE=5 export MKL_ENABLE_INSTRUCTIONS=AVX2 export OMPI_MCA_btl=self export OMPI_MCA_coll="^ml" export OMPI_MCA_coll_hcoll_enable="1" export OMPI_MCA_io="romio321" export OMPI_MCA_osc="pt2pt" export OMPI_MCA_pml="ucx" export HCOLL_ENABLE_MCAST_ALL="1" export HCOLL_MAIN_IB=mlx5_0:1 export UCX_IB_ADDR_TYPE=ib_global export UCX_NET_DEVICES=mlx5_0:1 export UCX_TLS=mm,knem,cma,dc_mlx5,dc_x,self export UCX_UNIFIED_MODE=y export SLURM_CPU_FREQ_REQ=HighM1 # Intel OpenMP export KMP_STACKSIZE=128M # OpenMP export OMP_NUM_THREADS=1 # Workaround for SLURM bug in chained jobs SLURM_JOB_NAME=esm-ssp585-ocn-alk_EXP1_post sbatch () { unset SLURM_MEM_PER_CPU SLURM_NTASKS_PER_NODE command sbatch "$@" } # DEBUG_LEVEL=${DEBUG_LEVEL:-0} # Support log style output export LANG=C print () { echo "$(date +'%F %T'):" "$@"; } print_re='^[0-9]+-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}' warn () { print 'Hey:' "$@" >&2; } die () { print 'Oops:' "$@" >&2; print Error at line ${BASH_LINENO[0]} >&2; exit 1; } # Bail out on error trap 'print Error at line $LINENO >&2' ERR set -eu # Print command info [[ $DEBUG_LEVEL -ge 2 ]] && set -x ############################################################################### # # TIME PARAMETERS # ############################################################################### jobnum=Jobnum startdate=Startdate nextdate=Nextdate inidate=Inidate findate=Findate ############################################################################### # # USER INTERFACE # ############################################################################### # # Setup of experiment # EXP_ID=esm-ssp585-ocn-alk_EXP1 # # File systems # # SCRIPT_DIR: Permanent file system for the SCRIPTS on the COMPUTING HOST # (only needs to be specified if the tasks are NOT generated on the # computing host) SCRIPT_DIR=/home/m/m300966/CDRSynTra-mpiesm-1.2.01p7/experiments/esm-ssp585-ocn-alk_EXP1/scripts # DATA_DIR, LOG_DIR: # Directories of the SHORT TERM data server. # Model INPUT and OUTPUT will be read from/written to # this file system of the computing host DATA_DIR=/work/bm1241/m300966/CDRSynTra-mpiesm-1.2.01p7/experiments/esm-ssp585-ocn-alk_EXP1/outdata LOG_DIR=/work/bm1241/m300966/CDRSynTra-mpiesm-1.2.01p7/experiments/esm-ssp585-ocn-alk_EXP1/log # Time control INTERVAL_MONTHS=12 OUTPUT_INTERVAL='1 month' ############################################################################### # # END OF USER INTERFACE # ############################################################################### # # Component directories/names # atmmod=echam6 srfmod=jsbach ocemod=mpiom bgcmod=hamocc # # Default options for Unix commands # mkdir () { command mkdir -vp "$@"; } rm () { command rm -vf "$@"; } ln () { command ln -vf "$@"; } cdo () { command cdo -O "$@"; } after () { command cdo after "$@"; } # # Utilities # PATH=/home/m/m300966/CDRSynTra-mpiesm-1.2.01p7/util/running/functions:$PATH function calc_date { typeset command="$1" shift command calc_date $command -c 1 "$@" } function get_file_names { typeset pattern="$1" shift echo $(printf " $pattern" "$@") } function time_merge { typeset out="$1" typeset tmp=$(dirname $out)/.$(basename $out) shift cat "$@" > $tmp && mv $tmp $out } # # Job specification # case "$OUTPUT_INTERVAL" in '') mean_op='-monmean'; avg_op='-monavg';; *) mean_op=; avg_op=;; esac max_jobs=16 sleep_time=2 # Definition of some time variables # # enddate: last day of this run # prevstartdate: first day of previous run enddate=$(calc_date minus -D1 -- $nextdate) prevstartdate=$(calc_date minus -M12 -- ${startdate}_) prevstartdate=$(calc_date minus -D0 -- ${prevstartdate}_) # # DATA PROCESSING # print "post-processing started for $startdate-$enddate" # Check time range if [[ $startdate != *01 ]] then die "invalid start date; currently only first of month is supported" fi if [[ $nextdate != *01 ]] then die "invalid next date; currently only first of month is supported" fi # Computation of expected input time stamps startstamp=${startdate%??} laststamp= stamps= currdate=$startdate while [[ $(later_date -- $currdate $enddate) == $enddate ]] do laststamp=${currdate%??} stamps="$stamps $laststamp" currdate=$(calc_date plus -M 1 $currdate) done # Computation of expected years for concatenated output iniyear=${inidate%????} startyear=${startdate%????} endyear=${enddate%????} [[ $startyear == $iniyear && $inidate != *0101 ]] && ((++startyear)) [[ $enddate != *1231 ]] && ((--endyear)) # Temporary directory post_dir=$DATA_DIR/post_$startdate-$enddate [[ -d $post_dir ]] && warn "previous job failed or still running; removing temp dir" rm -r $post_dir mkdir $post_dir # Move previous log file to log directory mkdir -pv $LOG_DIR for log_path in $(grep -El "$print_re: post-processing finished for $prevstartdate" $SCRIPT_DIR/${EXP_ID}_post*_[0-9]*.log || :) do log_file=${log_path##*/} # The * after post might contain underscores or numbers, # so make sure we only take the latter part as previous job's id log_prefix=${log_file%_[0-9]*.log} log_suffix=${log_file#$log_prefix} mv $log_path $LOG_DIR/${EXP_ID}_post_${prevstartdate}$log_suffix done # # Postprocessing of ECHAM # print 'ECHAM post-processing started' outmod=${DATA_DIR}/${atmmod} fileext=.grb mkdir ${outmod} cd ${outmod} prefix=${EXP_ID}_${atmmod} # Lists of files meantags='BOT ATM LOG' filetags='echam co2 tracer accw echamday' # Remove list atm_files_to_remove= # Generate monthly means, possibly apply afterburner transformations echo 0 > $post_dir/status remove_list= for stamp in $stamps do suffix=${stamp}${fileext} for filetag in $meantags $filetags do input=${prefix}_${filetag}_${suffix} output=${prefix}_${filetag}_mm_${suffix} # If too many jobs run at the same time, wait while (( $(jobs -pr | wc -l) >= max_jobs )); do sleep $sleep_time; done case $filetag/$mean_op in BOT/*) input=${prefix}_echam_${suffix} BOT_1_file=${post_dir}/${prefix}_BOT_1_mm_${suffix} BOT_2_file=${post_dir}/${prefix}_BOT_2_mm_${suffix} BOT_3_file=${post_dir}/${prefix}_BOT_3_mm_${suffix} ( trap 'echo $? > $post_dir/status' ERR after $input $BOT_1_file << EOF &select code = 64, 65, 66, 67, 83, 84, 85, 86, 87, 88, 91, 92, 93, 94, 95, 96, 97, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 119, 120, 121, 122, 123, 124, 134, 137, 139, 140, 141, 142, 143, 144, 145, 146, 147, 150, 151, 164, 165, 166, 167, 168, 169, 171, 175, 176, 177, 178, 179, 180, 181, 182, 184, 185, 186, 187, 188, 191, 192, 193, 197, 203, 204, 205, 208, 209, 210, 211, 213, 214, 216, 229, 230, 231, 233, 235, 260 type = 20 level = 1 format = 1 mean = 0 / EOF # Post-process accw stream cdo -f nc2 setmisstoc,0.0 $mean_op ${prefix}_accw_${suffix} \ $post_dir/${prefix}_BOT_2_mm_${stamp}.nc cdo -f grb copy $post_dir/${prefix}_BOT_2_mm_${stamp}.nc \ $BOT_2_file # Generate third part of new BOT file for ECHAM: # Post-process missing code 219 out of jsbach-output cdo mulc,1000. -setcode,219 -selcode,218 $avg_op \ $DATA_DIR/$srfmod/${EXP_ID}_${srfmod}_jsbach_${suffix} $BOT_3_file # Assemble final BOT file cdo merge $BOT_1_file $BOT_2_file $BOT_3_file $output ) & ;; ATM/*) input=${prefix}_echam_${suffix} ATM_1_file=${post_dir}/${prefix}_ATM_1_mm_${suffix} ATM_2_file=${post_dir}/${prefix}_ATM_2_mm_${suffix} ( trap 'echo $? > $post_dir/status' ERR after $input $ATM_1_file << EOF &select code = 130, 131, 132, 133, 135, 153, 154, 156, 157, 223 level = 100000, 92500, 85000, 77500, 70000, 60000, 50000, 40000, 30000, 25000, 20000, 15000, 10000, 7000, 5000, 3000, 2000, 1000, 700, 500, 300, 200, 100, 50, 20, 10 type = 30 format = 1 mean = 0 / EOF after $input $ATM_2_file << EOF &select code = 138, 148, 149, 155 level = 100000, 92500, 85000, 77500, 70000, 60000, 50000, 40000, 30000, 25000, 20000, 15000, 10000, 7000, 5000, 3000, 2000, 1000, 700, 500, 300, 200, 100, 50, 20, 10 type = 70 format = 1 mean = 0 / EOF cdo merge $ATM_1_file $ATM_2_file $output ) & ;; LOG/*) input=${prefix}_echam_${suffix} ( trap 'echo $? > $post_dir/status' ERR after $input $output << EOF &select code = 130, 131, 132 level = 100935, 99567, 97166, 93994, 90264, 86141, 81754, 77211, 72595, 67961, 63353, 58808, 54350, 49998, 45770, 41687, 37755, 33987, 30402, 27015, 23833, 20867, 18116, 15578, 13239, 11066, 9102, 7406, 5964, 4752, 3743, 2914, 2235, 1685, 1245, 901, 637, 440, 296, 193, 122, 74, 43, 23, 11, 4, 1 type = 30 format = 1 mean = 0 / EOF ) & ;; QBO/*) input=${prefix}_echam_${suffix} ( trap 'echo $? > $post_dir/status' ERR after $input $output << EOF &select code = 131 level = 10000, 7000, 5000, 3000, 2000, 1000, 700, 500, 300, 200, 100 type = 30 format = 1 mean = 0 / EOF ) & ;; co2/?*|tracer/?*) ( trap 'echo $? > $post_dir/status' ERR cdo $avg_op $input $output ) & [[ " $meantags " != *" $filetag "* ]] && meantags="$meantags $filetag" ;; co2/|tracer/) ( trap 'echo $? > $post_dir/status' ERR ln $input $output ) & [[ " $meantags " != *" $filetag "* ]] && meantags="$meantags $filetag" remove_list="$remove_list $input" ;; */?*) # with averaging, by default, files are left as they are ;; *) # w/o averaging, by default, files are left as they are ;; esac done # filetags done # stamp in $stamps wait [[ $(<$post_dir/status) -eq 0 ]] atm_files_to_remove="$atm_files_to_remove $remove_list" print 'ECHAM post-processing finished' # # Concatenate monthly to yearly files # print 'ECHAM concatenation started' echo 0 > $post_dir/status remove_list= for ((year=startyear; year<=endyear; ++year)) do for filetag in $(echo $meantags | sed 's/\>/_mm/g') $filetags do input=${prefix}_${filetag}_%s$fileext output=${prefix}_${filetag}_$year$fileext inputs=$(get_file_names $input $(for month in $(seq -f %02.0f 1 12); do echo ${year}${month}; done)) # If too many jobs run at the same time, wait while (( $(jobs -pr | wc -l) >= max_jobs )); do sleep $sleep_time; done ( trap 'echo $? > $post_dir/status' ERR time_merge $output $inputs ) & remove_list="$remove_list $inputs" done done wait [[ $(<$post_dir/status) -eq 0 ]] atm_files_to_remove="$atm_files_to_remove $remove_list" print 'ECHAM concatenation finished' # # Postprocessing of JSBACH # print 'JSBACH post-processing started' outmod=${DATA_DIR}/${srfmod} fileext=.grb mkdir ${outmod} cd ${outmod} prefix=${EXP_ID}_${srfmod} function file_name { typeset tag=$1 typeset stamp=$2 echo ${prefix}_${tag}_$stamp$fileext } # Remove list srf_files_to_remove= remove_list= for stamp in $stamps do make -j $max_jobs -f - all << EOF # # [output.jsbach] # $(file_name veg_mm $stamp): $(file_name veg $stamp) ln -vf $(file_name veg $stamp) $(file_name veg_mm $stamp) $(file_name surf_mm $stamp): $(file_name surf $stamp) ln -vf $(file_name surf $stamp) $(file_name surf_mm $stamp) $(file_name yasso_mm $stamp): $(file_name yasso $stamp) ln -vf $(file_name yasso $stamp) $(file_name yasso_mm $stamp) $(file_name nitro_mm $stamp): $(file_name nitro $stamp) ln -vf $(file_name nitro $stamp) $(file_name nitro_mm $stamp) # # [output.jsbach.fapar_mon] # $post_dir/$(file_name fapar_mon $stamp): $(file_name land $stamp) cdo expr,var125=var148/var149 $(file_name land $stamp) $post_dir/$(file_name fapar_mon $stamp) # # [output.jsbach.albedo_mon] # $post_dir/$(file_name albedo_mon $stamp): $(file_name jsbach $stamp) cdo expr,var13=var22/var21 $(file_name jsbach $stamp) $post_dir/$(file_name albedo_mon $stamp) # # [output.jsbach.land_mm] # $(file_name land_mm $stamp): $(file_name land $stamp) $post_dir/$(file_name fapar_mon $stamp) cdo merge $(file_name land $stamp) $post_dir/$(file_name fapar_mon $stamp) $(file_name land_mm $stamp) # # [output.jsbach.jsbach_mm] # $(file_name jsbach_mm $stamp): $(file_name jsbach $stamp) $post_dir/$(file_name albedo_mon $stamp) $post_dir/$(file_name fapar_mon $stamp) cdo merge $(file_name jsbach $stamp) $post_dir/$(file_name albedo_mon $stamp) $post_dir/$(file_name fapar_mon $stamp) $(file_name jsbach_mm $stamp) # # Global targets # all: $(file_name veg_mm $stamp) $(file_name surf_mm $stamp) $(file_name yasso_mm $stamp) $(file_name nitro_mm $stamp) $(file_name jsbachday $stamp) $(file_name vegday $stamp) $(file_name landday $stamp) $(file_name land_mm $stamp) $(file_name jsbach_mm $stamp) EOF remove_list="$remove_list $(file_name veg $stamp) $(file_name surf $stamp) $(file_name yasso $stamp) $(file_name nitro $stamp) $(file_name land $stamp) $(file_name jsbach $stamp)" done # stamps srf_files_to_remove="$srf_files_to_remove $remove_list" print 'JSBACH post-processing finished' # # Concatenate monthly to yearly files # print 'JSBACH concatenation started' echo 0 > $post_dir/status remove_list= for ((year=startyear; year<=endyear; ++year)) do for filetag in veg_mm surf_mm yasso_mm nitro_mm jsbachday vegday landday land_mm jsbach_mm do input=${prefix}_${filetag}_%s$fileext output=${prefix}_${filetag}_$year$fileext inputs=$(get_file_names $input $(for month in $(seq -f %02.0f 1 12); do echo ${year}${month}; done)) # If too many jobs run at the same time, wait while (( $(jobs -pr | wc -l) >= max_jobs )); do sleep $sleep_time; done ( trap 'echo $? > $post_dir/status' ERR time_merge $output $inputs ) & remove_list="$remove_list $inputs" done done wait [[ $(<$post_dir/status) -eq 0 ]] srf_files_to_remove="$srf_files_to_remove $remove_list" print 'JSBACH concatenation finished' # # Epilogue # # Clean up if ((DEBUG_LEVEL < 3)) then print 'removal of temporary and non-precious data files started' rm -r $post_dir ( cd $DATA_DIR/$atmmod && rm $atm_files_to_remove ) ( cd $DATA_DIR/$srfmod && rm $srf_files_to_remove ) print 'removal of temporary and non-precious data files finished' else print 'Debug3: skipping clean-up of temporary files' fi # Update run dates and submit job scripts cd ${SCRIPT_DIR} # $Id$ print "starting mon job for $startdate-$enddate" ./esm-ssp585-ocn-alk_EXP1.create -f $startdate mon sbatch esm-ssp585-ocn-alk_EXP1.mon.$nextdate print "post-processing finished for $startdate-$enddate"