#!/bin/bash
#
# opensnoop - trace open() syscalls with file details.
#             Written using Linux ftrace.
#
# This traces open() syscalls, showing the file name and returned file
# descriptor number (or -1, for error).
#
# This implementation is designed to work on older kernel versions, and without
# kernel debuginfo. It works by dynamic tracing of the return value of getname()
# as a string, and associating it with the following open() syscall return.
# This approach is kernel version specific, and may not work on your version.
# It is a workaround, and proof of concept for ftrace, until more kernel tracing
# functionality is available.
#
# USAGE: ./opensnoop [-htx] [-d secs] [-p pid] [-L tid] [-n name] [filename]
#
# Run "opensnoop -h" for full usage.
#
# REQUIREMENTS: FTRACE and KPROBE CONFIG, syscalls:sys_exit_open tracepoint,
# getname() kernel function (you may already have these on recent kernels),
# and awk.
#
# From perf-tools: https://github.com/brendangregg/perf-tools
#
# See the opensnoop(8) man page (in perf-tools) for more info.
#
# COPYRIGHT: Copyright (c) 2014 Brendan Gregg.
#
#  This program is free software; you can redistribute it and/or
#  modify it under the terms of the GNU General Public License
#  as published by the Free Software Foundation; either version 2
#  of the License, or (at your option) any later version.
#
#  This program is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
#
#  You should have received a copy of the GNU General Public License
#  along with this program; if not, write to the Free Software Foundation,
#  Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
#
#  (http://www.gnu.org/copyleft/gpl.html)
#
# 20-Jul-2014	Brendan Gregg	Created this.

### default variables
tracing=/sys/kernel/debug/tracing
flock=/var/tmp/.ftrace-lock; wroteflock=0
opt_duration=0; duration=; opt_name=0; name=; opt_pid=0; pid=; opt_tid=0; tid=
ftext=; opt_time=0; opt_fail=0; opt_file=0; file=
trap ':' INT QUIT TERM PIPE HUP	# sends execution to end tracing section

function usage {
	cat <<-END >&2
	USAGE: opensnoop [-htx] [-d secs] [-p PID] [-L TID] [-n name] [filename]
	                 -d seconds      # trace duration, and use buffers
	                 -n name         # process name to match on open
	                 -p PID          # PID to match on open
	                 -L TID          # PID to match on open
	                 -t              # include time (seconds)
	                 -x              # only show failed opens
	                 -h              # this usage message
	                 filename        # match filename (partials, REs, ok)
	  eg,
	       opensnoop                 # watch open()s live (unbuffered)
	       opensnoop -d 1            # trace 1 sec (buffered)
	       opensnoop -p 181          # trace I/O issued by PID 181 only
	       opensnoop conf            # trace filenames containing "conf"
	       opensnoop 'log$'          # filenames ending in "log"

	See the man page and example file for more info.
END
	exit
}

function warn {
	if ! eval "$@"; then
		echo >&2 "WARNING: command failed \"$@\""
	fi
}

function end {
	# disable tracing
	echo 2>/dev/null
	echo "Ending tracing..." 2>/dev/null
	cd $tracing
	warn "echo 0 > events/kprobes/getnameprobe/enable"
	warn "echo 0 > events/syscalls/sys_exit_open/enable"
	if (( opt_pid || opt_tid )); then
		warn "echo 0 > events/kprobes/getnameprobe/filter"
		warn "echo 0 > events/syscalls/sys_exit_open/filter"
	fi
	warn "echo -:getnameprobe >> kprobe_events"
	warn "echo > trace"
	(( wroteflock )) && warn "rm $flock"
}

function die {
	echo >&2 "$@"
	exit 1
}

function edie {
	# die with a quiet end()
	echo >&2 "$@"
	exec >/dev/null 2>&1
	end
	exit 1
}

### process options
while getopts d:hn:p:L:tx opt
do
	case $opt in
	d)	opt_duration=1; duration=$OPTARG ;;
	n)	opt_name=1; name=$OPTARG ;;
	p)	opt_pid=1; pid=$OPTARG ;;
	L)	opt_tid=1; tid=$OPTARG ;;
	t)	opt_time=1 ;;
	x)	opt_fail=1 ;;
	h|?)	usage ;;
	esac
done
shift $(( $OPTIND - 1 ))
if (( $# )); then
	opt_file=1
	file=$1
	shift
fi
(( $# )) && usage

### option logic
(( opt_pid + opt_name + opt_tid > 1 )) && \
	die "ERROR: use at most one of -p, -n, -L."
(( opt_pid )) && ftext=" issued by PID $pid"
(( opt_tid )) && ftext=" issued by TID $tid"
(( opt_name )) && ftext=" issued by process name \"$name\""
(( opt_file )) && ftext="$ftext for filenames containing \"$file\""
if (( opt_duration )); then
	echo "Tracing open()s$ftext for $duration seconds (buffered)..."
else
	echo "Tracing open()s$ftext. Ctrl-C to end."
fi

### select awk
(( opt_duration )) && use=mawk || use=gawk	# workaround for mawk fflush()
[[ -x /usr/bin/$use ]] && awk=$use || awk=awk

### check permissions
cd $tracing || die "ERROR: accessing tracing. Root user? Kernel has FTRACE?
    debugfs mounted? (mount -t debugfs debugfs /sys/kernel/debug)"

### ftrace lock
[[ -e $flock ]] && die "ERROR: ftrace may be in use by PID $(cat $flock) $flock"
echo $$ > $flock || die "ERROR: unable to write $flock."
wroteflock=1

### setup and begin tracing
echo nop > current_tracer
ver=$(uname -r)
if [[ "$ver" == 2.* || "$ver" == 3.[1-6].* ]]; then
	# rval is char *
	kprobe='r:getnameprobe getname +0($retval):string'
else
	# rval is struct filename *
	kprobe='r:getnameprobe getname +0(+0($retval)):string'
fi
if ! echo $kprobe >> kprobe_events; then
	edie "ERROR: adding a kprobe for getname(). Exiting."
fi
if (( opt_pid )); then
	filter=
	for tid in /proc/$pid/task/*; do
		filter="$filter || common_pid == ${tid##*/}"
	done
	filter=${filter:3}  # trim leading ' || ' (four characters)
	if ! echo $filter > events/kprobes/getnameprobe/filter || \
	    ! echo $filter > events/syscalls/sys_exit_open/filter
	then
	    edie "ERROR: setting -p $pid. Exiting."
	fi
fi
if (( opt_tid )); then
	if ! echo "common_pid == $tid" > events/kprobes/getnameprobe/filter || \
	    ! echo "common_pid == $tid" > events/syscalls/sys_exit_open/filter
	then
	    edie "ERROR: setting -L $tid. Exiting."
	fi
fi
if ! echo 1 > events/kprobes/getnameprobe/enable; then
	edie "ERROR: enabling kprobe for getname(). Exiting."
fi
if ! echo 1 > events/syscalls/sys_exit_open/enable; then
	edie "ERROR: enabling open() exit tracepoint. Exiting."
fi
(( opt_time )) && printf "%-16s " "TIMEs"
printf "%-16.16s %-6s %4s %s\n" "COMM" "PID" "FD" "FILE"

#
# Determine output format. It may be one of the following (newest first):
#           TASK-PID   CPU#  ||||    TIMESTAMP  FUNCTION
#           TASK-PID    CPU#    TIMESTAMP  FUNCTION
# To differentiate between them, the number of header fields is counted,
# and an offset set, to skip the extra column when needed.
#
offset=$($awk 'BEGIN { o = 0; }
	$1 == "#" && $2 ~ /TASK/ && NF == 6 { o = 1; }
	$2 ~ /TASK/ { print o; exit }' trace)

### print trace buffer
warn "echo > trace"
( if (( opt_duration )); then
	# wait then dump buffer
	sleep $duration
	cat trace
else
	# print buffer live
	cat trace_pipe
fi ) | $awk -v o=$offset -v opt_name=$opt_name -v name=$name \
    -v opt_duration=$opt_duration -v opt_time=$opt_time -v opt_fail=$opt_fail \
    -v opt_file=$opt_file -v file=$file '
	# common fields
	$1 != "#" {
		# task name can contain dashes and space
		split($0, line, "-")
		sub(/^[ \t\r\n]+/, "", line[1])
		comm = line[1]
		if (opt_name && match(comm, name) == 0)
			next
		sub(/ .*$/, "", line[2])
		pid = line[2]
	}

	# do_sys_open()
	$1 != "#" && $(5+o) ~ /do_sys_open/ {
		#
		# eg: ... (do_sys_open+0xc3/0x220 <- getname) arg1="file1"
		#
		filename = $NF
		sub(/"$/, "", filename)
		sub(/.*"/, "", filename)
		lastfile[pid] = filename
	}

	# sys_open()
	$1 != "#" && $(4+o) == "sys_open" {
		filename = lastfile[pid]
		delete lastfile[pid]
		if (opt_file && filename !~ file)
			next
		rval = $NF
		# matched failed as beginning with 0xfffff
		if (opt_fail && rval !~ /0xfffff/)
			next
		if (rval ~ /0xfffff/)
			rval = -1

		if (opt_time) {
			time = $(3+o); sub(":", "", time)
			printf "%-16s ", time
		}
		printf "%-16.16s %-6s %4s %s\n", comm, pid, rval, filename
	}

	$0 ~ /LOST.*EVENTS/ { print "WARNING: " $0 > "/dev/stderr" }
'

### end tracing
end
