2. Top
Gets you a list of processes that are consuming the CPU
htop
Near real time list of running processes by CPU, includes
scrolling, and mouse support
vmstat
Provides information about processes, memory, paging, I/O,
traps and CPU
w/who/finger
Provides information about users that are consuming
resources on the computer
ps (ps –ef )
Lists all the currently running processes on a Linux computer
3. pgrep/pkill
pgrep <process name> lists the PID of the process based on
name
pkill <process name> sends a specific kill signal (default
sigterm or shutdown) to a matching process
free
Shows the current memory usage of the system. Shows
physical and swap memory
mpstat
mpstat 2 5 - shows five set of data of global statistics among
all processors at two second intervals.
mpstat –P ALL 2 5 - shows 5 sets of statistics for all processors
at two second intervals.
4. iostat
reports CPU statists for devices and partitions
(including NFS Samba partitions)
pmap
This command reports memory map of a process. This
can be used to find memory usage of the process.
5. Set the debug mode for this, you will want it,
remember what each debug mode switch does
1. # set -n : Uncomment to check script syntax, without
execution.
2. # Note: Do not forget to put the comment back in
or
3. # the shell script will not execute!
4. # set -x : Uncomment to debug this shell script
6. PROC_MON=`basename $0` # Defines the script_name variable as the
file name of this script
LOGFILE="/home/ganesh/procmon.log" # Shows log file and where
located
[[ ! -s $LOGFILE ]] && touch $LOGFILE # This checks to see if the file exists
# if not it creates one.
TTY=$(tty) # Current tty or pty
PROCESS="ssh" # This will define which process to monitor
SLEEP_TIME="1" # This is the sleep time in second between
monitoring
txtred=$(tput setaf 1) # Red: will indicate a failed process and the information
txtgrn=$(tput setaf 2) # Green: this is successful process information
txtylw=$(tput setaf 3) # Yellow: this is used to show cautionary information
txtrst=$(tput sgr0) # resets text
7. function exit_trap # this is the behavior of the trap
signal
{
# Log an ending time for process monitoring
DATE=$(date +%D)
TIME=$(date +%T) # Get a new timestamp...
echo "$DATE @ $TIME: Monitoring for $PROCESS
terminated" >> $LOGFILE & # this will create an entry in
the logfile
echo "$DATE @ $TIME: ${txtred}Monitoring for
$PROCESS terminated${txtrst}"
#kill all functions
kill -9 $(jobs -p) 2>/dev/null
8. Set the trap to see if the process exits
trap 'exit_trap; exit 0' 1 2 3 15
# this will see if process is running if not will start it
ps aux | grep "$PROCESS" | grep -v "grep $PROCESS"
| grep -v $PROC_MON >/dev/null
9. if (( $? != 0 ))
then
DATE=$(date +%D)
TIME=$(date +%T)
echo
echo "$DATE @ $TIME: $PROCESS is NOT active...starting $PROCESS.." >> $LOGFILE & #
creates
# an entry in the logfile
echo "$DATE @ $TIME: ${txtylw}$PROCESS is NOT active...starting $PROCESS..${txtrst}"
echo
sleep 1
service $PROCESS start &
echo "$DATE @ $TIME: $PROCESS has been started..." >> $LOGFILE & #puts an enrty in logfile
else # this will say what to do if process is already running
echo -e "n" # a blank line
DATE=$(date +%D)
TIME=$(date +%T)
echo "$DATE @ $TIME: $PROCESS is currnetly RUNNING..." >> $LOGFILE & # puts entry in logfile
echo "$DATE @ $TIME: ${txtgrn}$PROCESS is currently RUNNING...${txtrst}"
fi
10. while (( RC == 0 )) # this will loop until the return code is not zero
do
ps aux | grep $PROCESS | grep -v "grep $PROCESS"
| grep -v $PROC_MON >/dev/null 2>&1
if (( $? != 0 )) # check the return code
then
echo
DATE=$(date +%D)
TIME=$(date +%T)
echo "$DATE @ $TIME: $PROCESS has STOPPED..." >> $LOGFILE & # entry
in logfile
echo "$DATE @ $TIME: ${txtred}$PROCESS has STOPPED...${txtrst}"
echo
service $PROCESS start &
echo "$DATE @ $TIME: $PROCESS has RESTARTED..." >> $LOGFILE & #
ENTRY IN LOGFILE
echo "$DATE @ $TIME: ${txtgrn}$PROCESS has RESTARTED...${txtrst}"
sleep 1
11. ps aux | grep $PROCESS | grep -v "grep $PROCESS"
| grep -v $PROC_MON >/dev/null 2>&1
if (( $? != 0 )) # This will check the return code
then
echo
DATE=$(date +%D) # New time stamp
TIME=$(date +%T)
echo "$DATE @ $TIME: $PROCESS failed to restart..." >> $LOGFILE
& #entry in logfile
echo "$DATE @ $TIME: ${txtred}$PROCESS failed to
restart...${txtrst}"
exit 0
fi
fi
sleep $SLEEP_TIME # This is needed to reduce CPU Load!!!
done
12. Process is hard coded in the script
# Process to be monitored
target="ssh"
16. # Monitor process and restart if necessary
for attempt in 1 2 3
do
ps aux | grep "$target" | grep -v "grep $target"
| grep -v $script_name >/dev/null
if [ $? != 0 ]
then
log_time=$(date)
echo
echo "$(tput setaf 3)$target is not running. Attempt will be made to restart. This is attempt
$attempt of 3.$(tput sgr0)"
echo >>$log_file
echo "$log_time: $target is not running. Restarting. Attempt $attempt of 3.">>$log_file
echo
service $target start &
sleep 2 # Pause to prevent false positives from restart attempt.
else
attempt="3"
fi
done
sleep 2 # Pause to prevent false positives from restart attempt.
}
17. detect_failure()
{
ps aux | grep "$target" | grep -v "grep $target"
| grep -v $script_name >/dev/null
if [ $? != 0 ]
then
log_time=$(date)
echo
echo "$(tput setaf 1)$target is not running after 3 attempts. Process has failed and
cannot be restarted. $(tput sgr0)" # Report failure to user
echo "This script will now close."
echo "">>$log_file
echo "$log_time: $target cannot be restarted.">>$log_file # Log failure
script_failure="1" # Set failure flag
else
log_time=$(date)
echo
echo "$log_time : $target is running."
echo "$log_time : $target is running." >> $log_file
fi
}
18. program_closing()
{
# Report and log script shutdown
log_time=$(date)
echo
echo "Closing ProcMon script. No further monitoring of $target will be
performed." #Reports closing of ProcMon to user
echo
echo "$(tput setaf 1)$log_time: Monitoring for $target terminated. $(tput sgr0)"
echo
echo "$log_time: Monitoring for $target terminated.">>$log_file # Logs closing
of ProcMon to log_file
echo >> $log_file
echo "***************" >> $log_file
echo >> $log_file
# Ensure this script is properly killed
kill -9 > /dev/null
}
19. # Trap shutdown attempts to enable logging of shutdown
trap 'program_closing; exit 0' 1 2 3 15
# Inform user of purpose of script
clear
echo
echo "This script will monitor $target to ensure that it is running,"
echo "and attempt to restart it if it is not. If it is unable to"
echo "restart after 3 attempts, it will report failure and close."
sleep 2
#Perform monitoring
while [ $script_failure != "1" ]
do
process_monitoring # Monitors process and attempts 3 restarts if it fails.
detect_failure # Reports failure in the event that the process does not restart.
if [ $script_failure != "1" ]
then
sleep $wait_time
fi
done
sleep 2
program_closing # Logs script closure
exit 0