$ mkdir -p ~/cs370/examples/subshells_parallel_proc $ cd ~/cs370/examples/subshells_parallel_proc
#!/bin/bash # subshell-test.sh ( # Inside parentheses, and therefore a subshell . . . while [ 1 ] # Endless loop. do echo "Subshell running . . ." done ) # Script will run forever, # or at least until terminated by a Ctl-C. exit $? # End of script (but will never get here).
./subshell-test.shps aux | grep subshell-test.sh UID PID PPID C STIME TTY TIME CMD
500 2698 2502 0 14:26 pts/4 00:00:00 sh subshell-test.sh
500 2699 2698 21 14:26 pts/4 00:00:24 sh subshell-test.sh
^^^^
( command1; command2; command3; ... )
ls -al | ( command1; command2; ... ).
# Run group of echo and cat commands through a subshell if grep fails: grep -qf id_rsa.pub authorized_keys || ( echo echo "SSH public key (id_rsa.pub) contents not found in ~/.ssh/authorized_keys." echo "Appended id_rsa.pub to authorized_keys." echo cat id_rsa.pub >> authorized_keys )
# temporarily unset $http_proxy variable before a wget
$ (unset http_proxy; wget ...)
COMMAND1
COMMAND2
COMMAND3
# Start of subshell
(
IFS=:
PATH=/bin
unset TERMINFO
set -C
shift 5
COMMAND4
COMMAND5
exit 3 # Only exits the subshell!
)
# The parent shell has not been affected, and the environment is preserved.
COMMAND6
COMMAND7
(cat list1 list2 list3 | sort | uniq > list123) &
(cat list4 list5 list6 | sort | uniq > list456) &
# Merges and sorts both sets of lists simultaneously.
# Running in background ensures parallel execution.
#
# Could also have been done without subshells:
# cat list1 list2 list3 | sort | uniq > list123 &
# cat list4 list5 list6 | sort | uniq > list456 &
wait # Don't execute the next command until subshells finish.
diff list123 list456
Download and extract the parallel_shell_proc.tar.xz archive to try the following parallel processing examples. You can also download the archive with the command
wget https://plato.monmouth.edu/~jchung/download/parallel_shell_proc.tar.xz
Use log or state files that are generated by parallel processes:
#!/bin/sh
# run_scripts.sh
# Spawn off two sub-scripts to perform some function. This script
# will wait until they both signal their job is complete by creating
# an empty state file named sub_script1.done and sub_script2.done
# First, make sure the ".done" files don't exist In case there was an
# abrupt end and we didn't get a chance to cleanup the files:
sub_script1_file="sub_script1.done"
sub_script2_file="sub_script2.done"
rm -f ${sub_script1_file} ${sub_script2_file}
# Launch the two scripts in the background w/ &:
./sub_script1.sh ${sub_script1_file} &
./sub_script2.sh ${sub_script2_file} &
# Every 10 seconds, check whether both subscript state files have been
# created:
while [ ! -e ${sub_script1_file} -a ! -e ${sub_script2_file} ] ; do
sleep 10
done
# At this point, both sub-scripts are done, so clean up the .done
# files:
rm -f ${sub_script1_file} ${sub_script2_file}
sub_script1.sh (in the above example):
#!/bin/sh # sub_script1.sh # This script finds and writes a list of all the files in the /usr # directory and all subdirectories: echo "Running sub_script1.sh..." find /usr -type f > sub_script1.data 2> /dev/null # The above find command is finished. Now create an empty state file # to signal we're done. We were given the path and the name of the # file to create as a command-line argument ($1): touch $1 echo "sub_script1.sh done"
sub_script2.sh (in the above example):
#!/bin/sh # sub_script2.sh # This script grabs the slashdot.org homepage: echo "Running sub_script2.sh..." wget --quiet http://slashdot.org -O sub_script2.data # We're done, create the "done" state file that was given as a # command-line argument ($1): touch $1 echo "sub_script2.sh done"
However, the above example could have been greatly simplified just by using wait in bash.
#!/bin/bash # run_scripts_wait.sh # Spawn off two sub-scripts to perform some function. # This script will wait until they both signal their job is complete # Launch the scripts to create state files 1 & 2, though state files # aren't actually needed for this version of the script: ./sub_script1.sh 1 & ./sub_script2.sh 2 & # wait for both scripts to complete: wait # Clean up state files 1 & 2: rm 1 2
parallel-wordcount subdirectory.wordcount script developed in previous lab activitylarge1.txt, followed by large2.txt#!/bin/bash # sequentialwc: Simple sequential processing of large1.txt, # followed by large2.txt cat large1.txt | ./wordcount > large1.wc cat large2.txt | ./wordcount > large2.wc Run time (localhost): $ time ./sequentialwc real 0m8.473s
large1.txt and large2.txt#!/bin/bash # parallelwc: Simple local background parallel processing of large1.txt, # and large2.txt, with a wait statement. cat large1.txt | ./wordcount > large1.wc & cat large2.txt | ./wordcount > large2.wc & wait Run time (localhost): $ time ./parallelwc real 0m6.800s
distribwc script to execute wordcount script on localhost plus a remotehost with two large input fileslarge1.txtlarge1.txt is processed on the remotehost (Linux machine), via ssh.“cat large1.txt | ssh $remotehost …” pipeline is put in the background, like a local process.large1.txt are piped from localhost to remotehost's instance of the wordcount script via ssh.wordcount instance is redirected back to large1.wc on the localhost.#!/bin/bash # distribwc: script to execute ''wordcount'' script on localhost # plus a remotehost with two large input files remotehost=$1 # remotehost name, e.g. plato, rockhopper, csselin01, etc. workdir=$(pwd) # work directory where the wordcount script is located on both # localhost and remotehost; assumes wordcount script and input # files are in the same directory cat large1.txt | ssh $remotehost "$workdir/wordcount" > large1.wc & cat large2.txt | $workdir/wordcount > large2.wc & wait Run time (localhost + csselin09): $ time ./distribwc csselin09 # Ran on localhost, processed large1.txt on csselin09 real 0m4.580s