-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathmultigud
executable file
·341 lines (307 loc) · 11.7 KB
/
multigud
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
#!/bin/bash
# This script takes an existing Gudrun input file, strips out the RAW files associated with a specified SAMPLE,
# and then sequentially rewrites the file in order to process a whole sequence of RAW files individually. In addition,
# these files can be merged together in groups.
# Set defaults
GUDRUN="/home/nimrod_mgr/Gudrun/Gudrun/bin/gudrun_dcs"
GUDRUNDAT="gudrun_dcs.dat"
SAMPLE=""
PREFIX="NIMROD000"
SUFFIX=""
STARTRUN=0
ENDRUN=0
NMERGE=1
MERGESTEP=1
MAXPROC=10
ITERATIONS=0
KEEPTEMP=0
declare -A ITERS
USAGE="Syntax: multigud <SAMPLE name to change> <First run no.> <Last run no.> [options...]"
USAGE=$USAGE"\n -g <Source GudRun input file> [default = \"$GUDRUNDAT\"]"
USAGE=$USAGE"\n -p <Source RAWfile prefix> [default = \"$PREFIX\"]"
USAGE=$USAGE"\n -x <Source RAWfile suffix> [default = \"$SUFFIX\"]"
USAGE=$USAGE"\n -s <Target SAMPLE name - must be exact>"
USAGE=$USAGE"\n -a <Starting run number>"
USAGE=$USAGE"\n -b <Ending run number>"
USAGE=$USAGE"\n -m <Number of files to run (merge) together> [default = $NMERGE]"
USAGE=$USAGE"\n -t <Run number step to use when merging> [default = value of NMERGE]"
USAGE=$USAGE"\n -i <Number of wavelength subtraction iterations [default = 0]"
USAGE=$USAGE"\n -n <Number of processes to run simultaneously> [default = $MAXPROC]"
USAGE=$USAGE"\n -h Displays long help"
USAGE=$USAGE"\n -k Keep temporary working directories"
if [ $# == 0 ]
then
echo -e $USAGE
exit 1
fi
# Parse command-line options...
while getopts "x:a:b:s:n:p:g:m:t:i:kh" options
do
case $options in
p ) PREFIX=$OPTARG
echo "Datafile prefix set to : $PREFIX";;
g ) GUDRUNDAT=$OPTARG
echo "Source Gudrun datafile set to : $GUDRUNDAT";;
x ) SUFFIX=$OPTARG
echo "Datafile suffix set to : $SUFFIX";;
m ) NMERGE=$OPTARG
MERGESTEP=$NMERGE
echo "Number of runs to merge together each time is $NMERGE (merge step = $MERGESTEP)";;
t ) MERGESTEP=$OPTARG
echo "Stepsize to be used in merge is $MERGESTEP";;
n ) MAXPROC=$OPTARG
echo "Number of simultaneous processes to run is $MAXPROC";;
s ) SAMPLE=$OPTARG
echo "SAMPLE name set to \"$SAMPLE\"";;
a ) STARTRUN=$OPTARG
echo "Start run number set to $STARTRUN";;
b ) ENDRUN=$OPTARG
echo "End run number set to $ENDRUN";;
i ) ITERATIONS=$OPTARG
echo "Wavelength subtraction iterations set to $ITERATIONS";;
k ) KEEPTEMP=1
echo "Temporary working directories ('multigud.N.dir') will not be deleted";;
h ) echo ""
echo "multigud takes an existing Gudrun input file, and strips the current list of raw datafiles from a "
echo "specified SAMPLE in the file. The user provides the details of a series of new raw files to insert"
echo "into this datafile, in the form (PREFIX)(RUNNO)[SUFFIX].raw. Normal raw files (called, for example,"
echo "NIMROD00017165.raw) have 'NIMROD000' as the prefix, and no suffix, while renamed files may possess"
echo "a suffix (e.g. 'NIMROD00017165_SampleB.raw', where '_SampleB' is then the suffix)."
echo ""
echo "Additionally, multigud allows multiple runs to be performed at once - i.e. parallel execution of"
echo "Gudrun, but without the need for and parallel framework (e.g. MPI). Simply specify the number of"
echo "concurrent processes with th '-n' switch. Instead of running each raw file in the specified range"
echo "individually, runs may also be processed in batches of some size (set with '-m')."
echo ""
echo "NOTE: All other SAMPLEs in the target Gudrun input file should be turned off (set 'Analyse this"
echo "sample' to 0) otherwise every process will needlessly run exactly the same data many times."
echo ""
echo "Examples:"
echo " "
echo " To run raw files SLS00012345 to SLS00012350 individually, changing the SAMPLE called 'Deuteriated'"
echo " in the Gudrun input file 'samples.txt':"
echo " bob@pc~> multigud -g samples.txt -s \"Deuteriated\" -p \"SLS000\" -a 12345 -b 12350"
echo " "
echo " To run all six of these files at once:"
echo " bob@pc~> multigud -g samples.txt -s \"Deuteriated\" -p \"SLS000\" -a 12345 -b 12350 -n 6"
echo " "
echo " To run the raw files NIMROD00010000 to NIMROD00010055 in groups of five, changing the SAMPLE"
echo " called 'Calcined at 500deg' in the Gudrun input file 'rb12345.txt':"
echo " bob@pc~> multigud -g rb12345.txt -s \"Calcined at 500deg\" -p \"NIMROD000\" -a 10000 -b 10055 -m 5"
echo " "
echo " For the same example where the files have been renamed to NIMROD00010000_Calcined_at_500degC.raw,"
echo " NIMROD00010001_Calcined_at_500degC.raw etc., use the following:"
echo " bob@pc~> multigud -g rb12345.txt -s \"Calcined at 500deg\" -p \"NIMROD000\" -a 10000 -b 10055 -m 5 -x \"_Calcined_at_500degC\""
echo " "
exit 1;;
\? ) echo "Error: Unrecognised switch '$options'"
exit 1;;
* ) echo "Error: Extra operands given."
echo -e $USAGE
exit 1;;
esac
done
# Check SAMPLE provided
if [ "$SAMPLE" == "" ]
then
echo "No target SAMPLE block specified."
if [ -e $GUDRUNDAT ]
then
echo "Available SAMPLES are:"
sed -n "/^SAMPLE /s/SAMPLE \(.*\){/\1/p" $GUDRUNDAT
else
echo "Specified Gudrun input file '$GUDRUNDAT' doesn't exist, so can't show list of contained samples."
fi
exit 1
fi
# Check starting run number
if (( STARTRUN == 0 ))
then
echo "Error: Starting run number must be given with '-a'."
exit 1
fi
# Check ending run number
if (( ENDRUN == 0 ))
then
echo "Error: Ending run number must be given with '-b'."
exit 1
fi
# Other variables
NRUNNING=0
CHILDPIDS=""
# Need to protect any '/' characters in the SAMPLE name
SAMPLE=`echo $SAMPLE | sed "s:/:[/]:g"`
echo $SAMPLE
# Does the specified Gudrun file excist?
if [ ! -e $GUDRUNDAT ]
then
echo "Specified Gudrun input file '$GUDRUNDAT' doesn't exist!"
exit 1
fi
# Check that the specified sample exists in the input file
if ! `grep -q "SAMPLE $SAMPLE\s\+{" $GUDRUNDAT`
then
echo "SAMPLE block named '$SAMPLE' not found in $GUDRUNDAT."
echo "Available SAMPLES are:"
sed -n "/^SAMPLE /s/SAMPLE \(.*\){/\1/p" $GUDRUNDAT
exit
fi
# Setup trap for SIGINT and SIGKILL so we can cleanup our processes
function interrupt
{
echo "Signal Caught"
echo "Current running processes are : "$CHILDPIDS
for a in $CHILDPIDS
do
echo "Killing $a..."
kill -9 $a
done
exit 1
}
trap "interrupt" SIGINT SIGTERM
# Function to update running process list/counter
function updateprocs
{
NEWPIDS=""
COUNT=0
for a in $CHILDPIDS
do
PS=`ps --pid $a -o pid=`
if [ "$PS" == "" ]
then
echo "Job $a appears to have finished - removing from list..."
let NRUNNING=NRUNNING-1
else
NEWPIDS="$a $NEWPIDS"
let COUNT=COUNT+1
fi
done
CHILDPIDS=$NEWPIDS
let NRUNNING=COUNT
}
# Function to run job
function runjob
{
echo "Running job file multigud.$I..."
# Make temporary directory...
if [ -e multigud.${I}.dir ]
then
rm -rf multigud.${I}.dir
fi
mkdir multigud.${I}.dir
# Change working directory name in input file and move it to the working dir
sed "s/\(\S*\)\(\W*Gudrun input file directory\)/\1\/multigud${I}.dir\2/g" multigud.$I > multigud.${I}.dir/multigud.${I}
# If we are not iterating, just run Gudrun as per normal
if (( ITERATIONS == 0 ))
then
bash << EOF &
cd multigud.${I}.dir
$GUDRUN multigud.$I > multigud.$I.out
cp $BASENAME.* ../
cd ../
if (( KEEPTEMP==0 )); then rm -rf multigud.${I}.dir; fi
EOF
else
# Before we start, grab the existing Q and wavelength ranges from the input file, along with the Top-Hat width
WRANGE=`grep "Wavelength range to use" multigud.$I | sed "s/\([0-9.]* *[0-9.]*\).*/\1/g"`
echo "Wavelength range (from input file): $WRANGE"
QRANGE=`grep "Min, Max and step in x-scale" multigud.$I | sed "s/\([0-9.]* *[0-9.]* *[0-9.]*\).*/\1/g"`
echo "Q range and step (from input file): $QRANGE"
TOPHAT=`grep "Top hat width" multigud.$I | sed "s/-*\([0-9.]*\).*/\1/g"`
echo "Top Hat width (from input file): $TOPHAT"
bash << EOF &
cd multigud.${I}.dir
# Loop over number of requested iterations
for ITER in \`seq 1 $ITERATIONS\`
do
# Set number of iterations in file, and set wavelength subtraction
sed -i "s/[0-9]*\(\W*Number of iterations\)/$ITERATIONS\1/g" multigud.$I
# Wavelength binning step
echo "($I) Wavelength iteration \$ITER of $ITERATIONS..."
# -- Change to wavelength binning
sed -i "s/\S*\W*\S*\W*\S*\(\W*Min, Max and step in x-scale\)/$WRANGE -0.01\1/g" multigud.$I
sed -i "s/[0-9]\(\W*Scale selection\)/3\1/g" multigud.$I
# -- Change Top Hat width to zero
sed -i "s/[0-9\.]*\(\W*Top hat width\)/0\1/g" multigud.$I
# -- Change self-scattering data filename
sed -i "s/.*\(Name of file containing self scattering\)/$BASENAME.mint01 \1/g" multigud.$I
if (( ITER == 1 ))
then
sed -i "s/[0-9]\(\W*Subtract wavelength-binned\)/0\1/g" multigud.$I
else
sed -i "s/[0-9]\(\W*Subtract wavelength-binned\)/1\1/g" multigud.$I
fi
cp multigud.$I multigud.$I.w\$ITER
$GUDRUN multigud.$I > multigud.$I.out.w\$ITER
# Q binning step
echo "($I) Q iteration \$ITER of $ITERATIONS..."
# -- Change to Q binning
sed -i "s/\S*\W*\S*\W*\S*\(\W*Min, Max and step in x-scale\)/$QRANGE\1/g" multigud.$I
sed -i "s/[0-9]\(\W*Scale selection\)/1\1/g" multigud.$I
# -- Reset Top Hat width
sed -i "s/[0-9\.]*\(\W*Top hat width\)/$TOPHAT\1/g" multigud.$I
# -- Change self-scattering data filename
sed -i "s/\S*\(\W*Name of file containing self scattering\)/$BASENAME.msubw01 \1/g" multigud.$I
# -- Enable wavelength subtraction
sed -i "s/[0-9]\(\W*Subtract wavelength-binned\)/1\1/g" multigud.$I
cp multigud.$I multigud.$I.q\$ITER
$GUDRUN multigud.$I > multigud.$I.out.q\$ITER
done
# Cleanup
cp $BASENAME.* ../
cd ../
if (( KEEPTEMP==0 )); then rm -rf multigud.${I}.dir; fi
EOF
fi
}
# Main Loop
echo "Run number range is $STARTRUN to $ENDRUN, to be run in groups of $NMERGE, with stepsize $MERGESTEP."
I=$STARTRUN
while (( I+NMERGE-1 <= ENDRUN ))
do
while (( I+NMERGE-1 <= ENDRUN )) && (( NRUNNING < MAXPROC ))
do
# Take existing input file and do the following:
# 1) Construct a list of new filenames to go into to the file
FILES=""
NEWLINE=""
BASENAME=""
for M in `seq 1 $NMERGE`
do
J=$(( I+M-1 ))
FILES=$FILES"${NEWLINE}${PREFIX}${J}${SUFFIX}.raw"
NEWLINE="\n"
# Set basename (first rawfile name in list)
if (( M == 1 ))
then
BASENAME="${PREFIX}${J}${SUFFIX}"
fi
done
# 2) Sed out the existing datafiles in the specified SAMPLE block, and add in new ones...
sed -e "/^SAMPLE ${SAMPLE}/,/Force calculation/{/raw/d;/period number/s/\([0-9]\+\)\s\+\(.*files and period number\)/${NMERGE} \2\n${FILES}/}" $GUDRUNDAT > multigud.$I
# 3) Call the runjob function, and add the PID of the last executed command to our list
runjob
CHILDPIDS="$! $CHILDPIDS"
# Now the job is running, increment our counters
let I=I+$MERGESTEP
let NRUNNING=NRUNNING+1
echo "Current process list ($NRUNNING): $CHILDPIDS"
done
# Update job process list
updateprocs
# Wait for a process to finish?
if (( NRUNNING > 0 ))
then
wait
fi
done
# Wait for remaining processes to finish
echo "Main loop has finished. Waiting for final $NRUNNING jobs to finish..."
updateprocs
while (( NRUNNING > 0 ))
do
wait
updateprocs
echo "...jobs remaining = $NRUNNING"
done
exit