diff options
| -rw-r--r-- | tools/Makefile | 14 | ||||
| -rw-r--r-- | tools/tracing/Makefile | 19 | ||||
| -rw-r--r-- | tools/tracing/latency/.gitignore | 2 | ||||
| -rw-r--r-- | tools/tracing/latency/Makefile | 24 | ||||
| -rw-r--r-- | tools/tracing/latency/latency-collector.c | 2108 |
5 files changed, 2161 insertions, 6 deletions
diff --git a/tools/Makefile b/tools/Makefile index 85af6ebbce91..7e9d34ddd74c 100644 --- a/tools/Makefile +++ b/tools/Makefile @@ -31,6 +31,7 @@ help: @echo ' bootconfig - boot config tool' @echo ' spi - spi tools' @echo ' tmon - thermal monitoring and tuning tool' + @echo ' tracing - misc tracing tools' @echo ' turbostat - Intel CPU idle stats and freq reporting tool' @echo ' usb - USB testing tools' @echo ' virtio - vhost test module' @@ -64,7 +65,7 @@ acpi: FORCE cpupower: FORCE $(call descend,power/$@) -cgroup firewire hv guest bootconfig spi usb virtio vm bpf iio gpio objtool leds wmi pci firmware debugging: FORCE +cgroup firewire hv guest bootconfig spi usb virtio vm bpf iio gpio objtool leds wmi pci firmware debugging tracing: FORCE $(call descend,$@) bpf/%: FORCE @@ -103,7 +104,7 @@ all: acpi cgroup cpupower gpio hv firewire liblockdep \ perf selftests bootconfig spi turbostat usb \ virtio vm bpf x86_energy_perf_policy \ tmon freefall iio objtool kvm_stat wmi \ - pci debugging + pci debugging tracing acpi_install: $(call descend,power/$(@:_install=),install) @@ -111,7 +112,7 @@ acpi_install: cpupower_install: $(call descend,power/$(@:_install=),install) -cgroup_install firewire_install gpio_install hv_install iio_install perf_install bootconfig_install spi_install usb_install virtio_install vm_install bpf_install objtool_install wmi_install pci_install debugging_install: +cgroup_install firewire_install gpio_install hv_install iio_install perf_install bootconfig_install spi_install usb_install virtio_install vm_install bpf_install objtool_install wmi_install pci_install debugging_install tracing_install: $(call descend,$(@:_install=),install) liblockdep_install: @@ -137,7 +138,8 @@ install: acpi_install cgroup_install cpupower_install gpio_install \ perf_install selftests_install turbostat_install usb_install \ virtio_install vm_install bpf_install x86_energy_perf_policy_install \ tmon_install freefall_install objtool_install kvm_stat_install \ - wmi_install pci_install debugging_install intel-speed-select_install + wmi_install pci_install debugging_install intel-speed-select_install \ + tracing_install acpi_clean: $(call descend,power/acpi,clean) @@ -145,7 +147,7 @@ acpi_clean: cpupower_clean: $(call descend,power/cpupower,clean) -cgroup_clean hv_clean firewire_clean bootconfig_clean spi_clean usb_clean virtio_clean vm_clean wmi_clean bpf_clean iio_clean gpio_clean objtool_clean leds_clean pci_clean firmware_clean debugging_clean: +cgroup_clean hv_clean firewire_clean bootconfig_clean spi_clean usb_clean virtio_clean vm_clean wmi_clean bpf_clean iio_clean gpio_clean objtool_clean leds_clean pci_clean firmware_clean debugging_clean tracing_clean: $(call descend,$(@:_clean=),clean) liblockdep_clean: @@ -184,6 +186,6 @@ clean: acpi_clean cgroup_clean cpupower_clean hv_clean firewire_clean \ vm_clean bpf_clean iio_clean x86_energy_perf_policy_clean tmon_clean \ freefall_clean build_clean libbpf_clean libsubcmd_clean liblockdep_clean \ gpio_clean objtool_clean leds_clean wmi_clean pci_clean firmware_clean debugging_clean \ - intel-speed-select_clean + intel-speed-select_clean tracing_clean .PHONY: FORCE diff --git a/tools/tracing/Makefile b/tools/tracing/Makefile new file mode 100644 index 000000000000..87e0ec48e2e7 --- /dev/null +++ b/tools/tracing/Makefile @@ -0,0 +1,19 @@ +# SPDX-License-Identifier: GPL-2.0 +include ../scripts/Makefile.include + +all: latency + +clean: latency_clean + +install: latency_install + +latency: + $(call descend,latency) + +latency_install: + $(call descend,latency,install) + +latency_clean: + $(call descend,latency,clean) + +.PHONY: all install clean latency latency_install latency_clean diff --git a/tools/tracing/latency/.gitignore b/tools/tracing/latency/.gitignore new file mode 100644 index 000000000000..0863960761e7 --- /dev/null +++ b/tools/tracing/latency/.gitignore @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0 +latency-collector diff --git a/tools/tracing/latency/Makefile b/tools/tracing/latency/Makefile new file mode 100644 index 000000000000..40c4ddaf8be1 --- /dev/null +++ b/tools/tracing/latency/Makefile @@ -0,0 +1,24 @@ +# SPDX-License-Identifier: GPL-2.0 +# Makefile for vm tools +# +VAR_CFLAGS := $(shell pkg-config --cflags libtracefs 2>/dev/null) +VAR_LDLIBS := $(shell pkg-config --libs libtracefs 2>/dev/null) + +TARGETS = latency-collector +CFLAGS = -Wall -Wextra -g -O2 $(VAR_CFLAGS) +LDFLAGS = -lpthread $(VAR_LDLIBS) + +all: $(TARGETS) + +%: %.c + $(CC) $(CFLAGS) -o $@ $< $(LDFLAGS) + +clean: + $(RM) latency-collector + +prefix ?= /usr/local +sbindir ?= ${prefix}/sbin + +install: all + install -d $(DESTDIR)$(sbindir) + install -m 755 -p $(TARGETS) $(DESTDIR)$(sbindir) diff --git a/tools/tracing/latency/latency-collector.c b/tools/tracing/latency/latency-collector.c new file mode 100644 index 000000000000..57b20802e71b --- /dev/null +++ b/tools/tracing/latency/latency-collector.c @@ -0,0 +1,2108 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2017, 2018, 2019, 2021 BMW Car IT GmbH + * Author: Viktor Rosendahl (viktor.rosendahl@bmw.de) + */ + +#define _GNU_SOURCE +#define _POSIX_C_SOURCE 200809L + +#include <ctype.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include <err.h> +#include <errno.h> +#include <fcntl.h> +#include <getopt.h> +#include <sched.h> +#include <linux/unistd.h> +#include <signal.h> +#include <sys/inotify.h> +#include <unistd.h> +#include <pthread.h> +#include <tracefs.h> + +static const char *prg_name; +static const char *prg_unknown = "unknown program name"; + +static int fd_stdout; + +static int sched_policy; +static bool sched_policy_set; + +static int sched_pri; +static bool sched_pri_set; + +static bool trace_enable = true; +static bool setup_ftrace = true; +static bool use_random_sleep; + +#define TRACE_OPTS \ + C(FUNC_TR, "function-trace"), \ + C(DISP_GR, "display-graph"), \ + C(NR, NULL) + +#undef C +#define C(a, b) OPTIDX_##a + +enum traceopt { + TRACE_OPTS +}; + +#undef C +#define C(a, b) b + +static const char *const optstr[] = { + TRACE_OPTS +}; + +enum errhandling { + ERR_EXIT = 0, + ERR_WARN, + ERR_CLEANUP, +}; + +static bool use_options[OPTIDX_NR]; + +static char inotify_buffer[655360]; + +#define likely(x) __builtin_expect(!!(x), 1) +#define unlikely(x) __builtin_expect(!!(x), 0) +#define bool2str(x) (x ? "true":"false") + +#define DEFAULT_NR_PRINTER_THREADS (3) +static unsigned int nr_threads = DEFAULT_NR_PRINTER_THREADS; + +#define DEFAULT_TABLE_SIZE (2) +static unsigned int table_startsize = DEFAULT_TABLE_SIZE; + +static int verbosity; + +#define verbose_sizechange() (verbosity >= 1) +#define verbose_lostevent() (verbosity >= 2) +#define verbose_ftrace() (verbosity >= 1) + +#define was_changed(ORIG, CUR) (strcmp(ORIG, CUR) != 0) +#define needs_change(CUR, WANTED) (strcmp(CUR, WANTED) != 0) + +static const char *debug_tracefile; +static const char *debug_tracefile_dflt; +static const char *debug_maxlat; +static const char *debug_maxlat_dflt; +static const char * const DEBUG_NOFILE = "[file not found]"; + +static const char * const TR_MAXLAT = "tracing_max_latency"; +static const char * const TR_THRESH = "tracing_thresh"; +static const char * const TR_CURRENT = "current_tracer"; +static const char * const TR_OPTIONS = "trace_options"; + +static const char * const NOP_TRACER = "nop"; + +static const char * const OPT_NO_PREFIX = "no"; + +#define DFLT_THRESHOLD_US "0" +static const char *threshold = DFLT_THRESHOLD_US; + +#define DEV_URANDOM "/dev/urandom" +#define RT_DEFAULT_PRI (99) +#define DEFAULT_PRI (0) + +#define USEC_PER_MSEC (1000L) +#define NSEC_PER_USEC (1000L) +#define NSEC_PER_MSEC (USEC_PER_MSEC * NSEC_PER_USEC) + +#define MSEC_PER_SEC (1000L) +#define USEC_PER_SEC (USEC_PER_MSEC * MSEC_PER_SEC) +#define NSEC_PER_SEC (NSEC_PER_MSEC * MSEC_PER_SEC) + +#define SLEEP_TIME_MS_DEFAULT (1000L) +#define TRY_PRINTMUTEX_MS (1000) + +static long sleep_time = (USEC_PER_MSEC * SLEEP_TIME_MS_DEFAULT); + +static const char * const queue_full_warning = +"Could not queue trace for printing. It is likely that events happen faster\n" +"than what they can be printed. Probably partly because of random sleeping\n"; + +static const char * const no_tracer_msg = +"Could not find any tracers! Running this program as root may help!\n"; + +static const char * const no_latency_tr_msg = +"No latency tracers are supported by your kernel!\n"; + +struct policy { + const char *name; + int policy; + int default_pri; +}; + +static const struct policy policies[] = { + { "other", SCHED_OTHER, DEFAULT_PRI }, + { "batch", SCHED_BATCH, DEFAULT_PRI }, + { "idle", SCHED_IDLE, DEFAULT_PRI }, + { "rr", SCHED_RR, RT_DEFAULT_PRI }, + { "fifo", SCHED_FIFO, RT_DEFAULT_PRI }, + { NULL, 0, DEFAULT_PRI } +}; + +/* + * The default tracer will be the first on this list that is supported by the + * currently running Linux kernel. + */ +static const char * const relevant_tracers[] = { + "preemptirqsoff", + "preemptoff", + "irqsoff", + "wakeup", + "wakeup_rt", + "wakeup_dl", + NULL +}; + +/* This is the list of tracers for which random sleep makes sense */ +static const char * const random_tracers[] = { + "preemptirqsoff", + "preemptoff", + "irqsoff", + NULL +}; + +static const char *current_tracer; +static bool force_tracer; + +struct ftrace_state { + char *tracer; + char *thresh; + bool opt[OPTIDX_NR]; + bool opt_valid[OPTIDX_NR]; + pthread_mutex_t mutex; +}; + +struct entry { + int ticket; + int ticket_completed_ref; +}; + +struct print_state { + int ticket_counter; + int ticket_completed; + pthread_mutex_t mutex; + pthread_cond_t cond; + int cnt; + pthread_mutex_t cnt_mutex; +}; + +struct short_msg { + char buf[160]; + int len; +}; + +static struct print_state printstate; +static struct ftrace_state save_state; +volatile sig_atomic_t signal_flag; + +#define PROB_TABLE_MAX_SIZE (1000) + +int probabilities[PROB_TABLE_MAX_SIZE]; + +struct sleep_table { + int *table; + int size; + pthread_mutex_t mutex; +}; + +static struct sleep_table sleeptable; + +#define QUEUE_SIZE (10) + +struct queue { + struct entry entries[QUEUE_SIZE]; + int next_prod_idx; + int next_cons_idx; + pthread_mutex_t mutex; + pthread_cond_t cond; +}; + +#define MAX_THREADS (40) + +struct queue printqueue; +pthread_t printthread[MAX_THREADS]; +pthread_mutex_t print_mtx; +#define PRINT_BUFFER_SIZE (16 * 1024 * 1024) + +static void cleanup_exit(int status); +static int set_trace_opt(const char *opt, bool value); + +static __always_inline void *malloc_or_die(size_t size) +{ + void *ptr = malloc(size); + + if (unlikely(ptr == NULL)) { + warn("malloc() failed"); + cleanup_exit(EXIT_FAILURE); + } + return ptr; +} + +static __always_inline void *malloc_or_die_nocleanup(size_t size) +{ + void *ptr = malloc(size); + + if (unlikely(ptr == NULL)) + err(0, "malloc() failed"); + return ptr; +} + +static __always_inline void write_or_die(int fd, const char *buf, size_t count) +{ + ssize_t r; + + do { + r = write(fd, buf, count); + if (unlikely(r < 0)) { + if (errno == EINTR) + continue; + warn("write() failed"); + cleanup_exit(EXIT_FAILURE); + } + count -= r; + buf += r; + } while (count > 0); +} + +static __always_inline void clock_gettime_or_die(clockid_t clk_id, + struct timespec *tp) +{ + int r = clock_gettime(clk_id, tp); + + if (unlikely(r != 0)) + err(EXIT_FAILURE, "clock_gettime() failed"); +} + +static __always_inline void sigemptyset_or_die(sigset_t *s) +{ + if (unlikely(sigemptyset(s) != 0)) { + warn("sigemptyset() failed"); + cleanup_exit(EXIT_FAILURE); + } +} + +static __always_inline void sigaddset_or_die(sigset_t *s, int signum) +{ + if (unlikely(sigaddset(s, signum) != 0)) { + warn("sigemptyset() failed"); + cleanup_exit(EXIT_FAILURE); + } +} + +static __always_inline void sigaction_or_die(int signum, + const struct sigaction *act, + struct sigaction *oldact) +{ + if (unlikely(sigaction(signum, act, oldact) != 0)) { + warn("sigaction() failed"); + cleanup_exit(EXIT_FAILURE); + } +} + +static void open_stdout(void) +{ + if (setvbuf(stdout, NULL, _IONBF, 0) != 0) + err(EXIT_FAILURE, "setvbuf() failed"); + fd_stdout = fileno(stdout); + if (fd_stdout < 0) + err(EXIT_FAILURE, "fileno() failed"); +} + +/* + * It's not worth it to call cleanup_exit() from mutex functions because + * cleanup_exit() uses mutexes. + */ +static __always_inline void mutex_lock(pthread_mutex_t *mtx) +{ + errno = pthread_mutex_lock(mtx); + if (unlikely(errno)) + err(EXIT_FAILURE, "pthread_mutex_lock() failed"); +} + + +static __always_inline void mutex_unlock(pthread_mutex_t *mtx) +{ + errno = pthread_mutex_unlock(mtx); + if (unlikely(errno)) + err(EXIT_FAILURE, "pthread_mutex_unlock() failed"); +} + +static __always_inline void cond_signal(pthread_cond_t *cond) +{ + errno = pthread_cond_signal(cond); + if (unlikely(errno)) + err(EXIT_FAILURE, "pthread_cond_signal() failed"); +} + +static __always_inline void cond_wait(pthread_cond_t *restrict cond, + pthread_mutex_t *restrict mutex) +{ + errno = pthread_cond_wait(cond, mutex); + if (unlikely(errno)) + err(EXIT_FAILURE, "pthread_cond_wait() failed"); +} + +static __always_inline void cond_broadcast(pthread_cond_t *cond) +{ + errno = pthread_cond_broadcast(cond); + if (unlikely(errno)) + err(EXIT_FAILURE, "pthread_cond_broadcast() failed"); +} + +static __always_inline void +mutex_init(pthread_mutex_t *mutex, + const pthread_mutexattr_t *attr) +{ + errno = pthread_mutex_init(mutex, attr); + if (errno) + err(EXIT_FAILURE, "pthread_mutex_init() failed"); +} + +static __always_inline void mutexattr_init(pthread_mutexattr_t *attr) +{ + errno = pthread_mutexattr_init(attr); + if (errno) + err(EXIT_FAILURE, "pthread_mutexattr_init() failed"); +} + +static __always_inline void mutexattr_destroy(pthread_mutexattr_t *attr) +{ + errno = pthread_mutexattr_destroy(attr); + if (errno) + err(EXIT_FAILURE, "pthread_mutexattr_destroy() failed"); +} + +static __always_inline void mutexattr_settype(pthread_mutexattr_t *attr, + int type) +{ + errno = pthread_mutexattr_settype(attr, type); + if (errno) + err(EXIT_FAILURE, "pthread_mutexattr_settype() failed"); +} + +static __always_inline void condattr_init(pthread_condattr_t *attr) +{ + errno = pthread_condattr_init(attr); + if (errno) + err(EXIT_FAILURE, "pthread_condattr_init() failed"); +} + +static __always_inline void condattr_destroy(pthread_condattr_t *attr) +{ + errno = pthread_condattr_destroy(attr); + if (errno) + err(EXIT_FAILURE, "pthread_condattr_destroy() failed"); +} + +static __always_inline void condattr_setclock(pthread_condattr_t *attr, + clockid_t clock_id) +{ + errno = pthread_condattr_setclock(attr, clock_id); + if (unlikely(errno)) + err(EXIT_FAILURE, "pthread_condattr_setclock() failed"); +} + +static __always_inline void cond_init(pthread_cond_t *cond, + const pthread_condattr_t *attr) +{ + errno = pthread_cond_init(cond, attr); + if (errno) + err(EXIT_FAILURE, "pthread_cond_init() failed"); +} + +static __always_inline int +cond_timedwait(pthread_cond_t *restrict cond, + pthread_mutex_t *restrict mutex, + const struct timespec *restrict abstime) +{ + errno = pthread_cond_timedwait(cond, mutex, abstime); + if (errno && errno != ETIMEDOUT) + err(EXIT_FAILURE, "pthread_cond_timedwait() failed"); + return errno; +} + +static void init_printstate(void) +{ + pthread_condattr_t cattr; + + printstate.ticket_counter = 0; + printstate.ticket_completed = 0; + printstate.cnt = 0; + + mutex_init(&printstate.mutex, NULL); + + condattr_init(&cattr); + condattr_setclock(&cattr, CLOCK_MONOTONIC); + cond_init(&printstate.cond, &cattr); + condattr_destroy(&cattr); +} + +static void init_print_mtx(void) +{ + pthread_mutexattr_t mattr; + + mutexattr_init(&mattr); + mutexattr_settype(&mattr, PTHREAD_MUTEX_RECURSIVE); + mutex_init(&print_mtx, &mattr); + mutexattr_destroy(&mattr); + +} + +static void signal_blocking(int how) +{ + sigset_t s; + + sigemptyset_or_die(&s); + sigaddset_or_die(&s, SIGHUP); + sigaddset_or_die(&s, SIGTERM); + sigaddset_or_die(&s, SIGINT); + + errno = pthread_sigmask(how, &s, NULL); + if (unlikely(errno)) { + warn("pthread_sigmask() failed"); + cleanup_exit(EXIT_FAILURE); + } +} + +static void signal_handler(int num) +{ + signal_flag = num; +} + +static void setup_sig_handler(void) +{ + struct sigaction sa; + + memset(&sa, 0, sizeof(sa)); + sa.sa_handler = signal_handler; + + sigaction_or_die(SIGHUP, &sa, NULL); + sigaction_or_die(SIGTERM, &sa, NULL); + sigaction_or_die(SIGINT, &sa, NULL); +} + +static void process_signal(int signal) +{ + char *name; + + name = strsignal(signal); + if (name == NULL) + printf("Received signal %d\n", signal); + else + printf("Received signal %d (%s)\n", signal, name); + cleanup_exit(EXIT_SUCCESS); +} + +static __always_inline void check_signals(void) +{ + int signal = signal_flag; + + if (unlikely(signal)) + process_signal(signal); +} + +static __always_inline void get_time_in_future(struct timespec *future, + long time_us) +{ + long nsec; + + clock_gettime_or_die(CLOCK_MONOTONIC, future); + future->tv_sec += time_us / USEC_PER_SEC; + nsec = future->tv_nsec + (time_us * NSEC_PER_USEC) % NSEC_PER_SEC; + if (nsec >= NSEC_PER_SEC) { + future->tv_nsec = nsec % NSEC_PER_SEC; + future->tv_sec += 1; + } +} + +static __always_inline bool time_has_passed(const struct timespec *time) +{ + struct timespec now; + + clock_gettime_or_die(CLOCK_MONOTONIC, &now); + if (now.tv_sec > time->tv_sec) + return true; + if (now.tv_sec < time->tv_sec) + return false; + return (now.tv_nsec >= time->tv_nsec); +} + +static bool mutex_trylock_limit(pthread_mutex_t *mutex, int time_ms) +{ + long time_us = time_ms * USEC_PER_MSEC; + struct timespec limit; + + get_time_in_future(&limit, time_us); + do { + errno = pthread_mutex_trylock(mutex); + if (errno && errno != EBUSY) + err(EXIT_FAILURE, "pthread_mutex_trylock() failed"); + } while (errno && !time_has_passed(&limit)); + return errno == 0; +} + +static void restore_trace_opts(const struct ftrace_state *state, + const bool *cur) +{ + int i; + int r; + + for (i = 0; i < OPTIDX_NR; i++) + if (state->opt_valid[i] && state->opt[i] != cur[i]) { + r = set_trace_opt(optstr[i], state->opt[i]); + if (r < 0) + warnx("Failed to restore the %s option to %s", + optstr[i], bool2str(state->opt[i])); + else if (verbose_ftrace()) + printf("Restored the %s option in %s to %s\n", + optstr[i], TR_OPTIONS, + bool2str(state->opt[i])); + } +} + +static char *read_file(const char *file, enum errhandling h) +{ + int psize; + char *r; + static const char *emsg = "Failed to read the %s file"; + + r = tracefs_instance_file_read(NULL, file, &psize); + if (!r) { + if (h) { + warn(emsg, file); + if (h == ERR_CLEANUP) + cleanup_exit(EXIT_FAILURE); + } else + errx(EXIT_FAILURE, emsg, file); + } + + if (r && r[psize - 1] == '\n') + r[psize - 1] = '\0'; + return r; +} + +static void restore_file(const char *file, char **saved, const char *cur) +{ + if (*saved && was_changed(*saved, cur)) { + if (tracefs_instance_file_write(NULL, file, *saved) < 0) + warnx("Failed to restore %s to %s!", file, *saved); + else if (verbose_ftrace()) + printf("Restored %s to %s\n", file, *saved); + free(*saved); + *saved = NULL; + } +} + +static void restore_ftrace(void) +{ + mutex_lock(&save_state.mutex); + + restore_file(TR_CURRENT, &save_state.tracer, current_tracer); + restore_file(TR_THRESH, &save_state.thresh, threshold); + restore_trace_opts(&save_state, use_options); + + mutex_unlock(&save_state.mutex); +} + +static void cleanup_exit(int status) +{ + char *maxlat; + + if (!setup_ftrace) + exit(status); + + /* + * We try the print_mtx for 1 sec in order to avoid garbled + * output if possible, but if it cannot be obtained we proceed anyway. + */ + mutex_trylock_limit(&print_mtx, TRY_PRINTMUTEX_MS); + + maxlat = read_file(TR_MAXLAT, ERR_WARN); + if (maxlat) { + printf("The maximum detected latency was: %sus\n", maxlat); + free(maxlat); + } + + restore_ftrace(); + /* + * We do not need to unlock the print_mtx here because we will exit at + * the end of this function. Unlocking print_mtx causes problems if a + * print thread happens to be waiting for the mutex because we have + * just changed the ftrace settings to the original and thus the + * print thread would output incorrect data from ftrace. + */ + exit(status); +} + +static void init_save_state(void) +{ + pthread_mutexattr_t mattr; + + mutexattr_init(&mattr); + mutexattr_settype(&mattr, PTHREAD_MUTEX_RECURSIVE); + mutex_init(&save_state.mutex, &mattr); + mutexattr_destroy(&mattr); + + save_state.tracer = NULL; + save_state.thresh = NULL; + save_state.opt_valid[OPTIDX_FUNC_TR] = false; + save_state.opt_valid[OPTIDX_DISP_GR] = false; +} + +static int printstate_next_ticket(struct entry *req) +{ + int r; + + r = ++(printstate.ticket_counter); + req->ticket = r; + req->ticket_completed_ref = printstate.ticket_completed; + cond_broadcast(&printstate.cond); + return r; +} + +static __always_inline +void printstate_mark_req_completed(const struct entry *req) +{ + if (req->ticket > printstate.ticket_completed) + printstate.ticket_completed = req->ticket; +} + +static __always_inline +bool printstate_has_new_req_arrived(const struct entry *req) +{ + return (printstate.ticket_counter != req->ticket); +} + +static __always_inline int printstate_cnt_inc(void) +{ + int value; + + mutex_lock(&printstate.cnt_mutex); + value = ++printstate.cnt; + mutex_unlock(&printstate.cnt_mutex); + return value; +} + +static __always_inline int printstate_cnt_dec(void) +{ + int value; + + mutex_lock(&printstate.cnt_mutex); + value = --printstate.cnt; + mutex_unlock(&printstate.cnt_mutex); + return value; +} + +static __always_inline int printstate_cnt_read(void) +{ + int value; + + mutex_lock(&printstate.cnt_mutex); + value = printstate.cnt; + mutex_unlock(&printstate.cnt_mutex); + return value; +} + +static __always_inline +bool prev_req_won_race(const struct entry *req) +{ + return (printstate.ticket_completed != req->ticket_completed_ref); +} + +static void sleeptable_resize(int size, bool printout, struct short_msg *msg) +{ + int bytes; + + if (printout) { + msg->len = 0; + if (unlikely(size > PROB_TABLE_MAX_SIZE)) + bytes = snprintf(msg->buf, sizeof(msg->buf), +"Cannot increase probability table to %d (maximum size reached)\n", size); + else + bytes = snprintf(msg->buf, sizeof(msg->buf), +"Increasing probability table to %d\n", size); + if (bytes < 0) + warn("snprintf() failed"); + else + msg->len = bytes; + } + + if (unlikely(size < 0)) { + /* Should never happen */ + warnx("Bad program state at %s:%d", __FILE__, __LINE__); + cleanup_exit(EXIT_FAILURE); + return; + } + sleeptable.size = size; + sleeptable.table = &probabilities[PROB_TABLE_MAX_SIZE - size]; +} + +static void init_probabilities(void) +{ + int i; + int j = 1000; + + for (i = 0; i < PROB_TABLE_MAX_SIZE; i++) { + probabilities[i] = 1000 / j; + j--; + } + mutex_init(&sleeptable.mutex, NULL); +} + +static int table_get_probability(const struct entry *req, + struct short_msg *msg) +{ + int diff = req->ticket - req->ticket_completed_ref; + int rval = 0; + + msg->len = 0; + diff--; + /* Should never happen...*/ + if (unlikely(diff < 0)) { + warnx("Programmer assumption error at %s:%d\n", __FILE__, + __LINE__); + cleanup_exit(EXIT_FAILURE); + } + mutex_lock(&sleeptable.mutex); + if (diff >= (sleeptable.size - 1)) { + rval = sleeptable.table[sleeptable.size - 1]; + sleeptable_resize(sleeptable.size + 1, verbose_sizechange(), + msg); + } else { + rval = sleeptable.table[diff]; + } + mutex_unlock(&sleeptable.mutex); + return rval; +} + +static void init_queue(struct queue *q) +{ + q->next_prod_idx = 0; + q->next_cons_idx = 0; + mutex_init(&q->mutex, NULL); + errno = pthread_cond_init(&q->cond, NULL); + if (errno) + err(EXIT_FAILURE, "pthread_cond_init() failed"); +} + +static __always_inline int queue_len(const struct queue *q) +{ + if (q->next_prod_idx >= q->next_cons_idx) + return q->next_prod_idx - q->next_cons_idx; + else + return QUEUE_SIZE - q->next_cons_idx + q->next_prod_idx; +} + +static __always_inline int queue_nr_free(const struct queue *q) +{ + int nr_free = QUEUE_SIZE - queue_len(q); + + /* + * If there is only one slot left we will anyway lie and claim that the + * queue is full because adding an element will make it appear empty + */ + if (nr_free == 1) + nr_free = 0; + return nr_free; +} + +static __always_inline void queue_idx_inc(int *idx) +{ + *idx = (*idx + 1) % QUEUE_SIZE; +} + +static __always_inline void queue_push_to_back(struct queue *q, + const struct entry *e) +{ + q->entries[q->next_prod_idx] = *e; + queue_idx_inc(&q->next_prod_idx); +} + +static __always_inline struct entry queue_pop_from_front(struct queue *q) +{ + struct entry e = q->entries[q->next_cons_idx]; + + queue_idx_inc(&q->next_cons_idx); + return e; +} + +static __always_inline void queue_cond_signal(struct queue *q) +{ + cond_signal(&q->cond); +} + +static __always_inline void queue_cond_wait(struct queue *q) +{ + cond_wait(&q->cond, &q->mutex); +} + +static __always_inline int queue_try_to_add_entry(struct queue *q, + const struct entry *e) +{ + int r = 0; + + mutex_lock(&q->mutex); + if (queue_nr_free(q) > 0) { + queue_push_to_back(q, e); + cond_signal(&q->cond); + } else + r = -1; + mutex_unlock(&q->mutex); + return r; +} + +static struct entry queue_wait_for_entry(struct queue *q) +{ + struct entry e; + + mutex_lock(&q->mutex); + while (true) { + if (queue_len(&printqueue) > 0) { + e = queue_pop_from_front(q); + break; + } + queue_cond_wait(q); + } + mutex_unlock(&q->mutex); + + return e; +} + +static const struct policy *policy_from_name(const char *name) +{ + const struct policy *p = &policies[0]; + + while (p->name != NULL) { + if (!strcmp(name, p->name)) + return p; + p++; + } + return NULL; +} + +static const char *policy_name(int policy) +{ + const struct policy *p = &policies[0]; + static const char *rval = "unknown"; + + while (p->name != NULL) { + if (p->policy == policy) + return p->name; + p++; + } + return rval; +} + +static bool is_relevant_tracer(const char *name) +{ + unsigned int i; + + for (i = 0; relevant_tracers[i]; i++) + if (!strcmp(name, relevant_tracers[i])) + return true; + return false; +} + +static bool random_makes_sense(const char *name) +{ + unsigned int i; + + for (i = 0; random_tracers[i]; i++) + if (!strcmp(name, random_tracers[i])) + return true; + return false; +} + +static void show_available(void) +{ + char **tracers; + int found = 0; + int i; + + tracers = tracefs_tracers(NULL); + for (i = 0; tracers && tracers[i]; i++) { + if (is_relevant_tracer(tracers[i])) + found++; + } + + if (!tracers) { + warnx(no_tracer_msg); + return; + } + + if (!found) { + warnx(no_latency_tr_msg); + tracefs_list_free(tracers); + return; + } + + printf("The following latency tracers are available on your system:\n"); + for (i = 0; tracers[i]; i++) { + if (is_relevant_tracer(tracers[i])) + printf("%s\n", tracers[i]); + } + tracefs_list_free(tracers); +} + +static bool tracer_valid(const char *name, bool *notracer) +{ + char **tracers; + int i; + bool rval = false; + + *notracer = false; + tracers = tracefs_tracers(NULL); + if (!tracers) { + *notracer = true; + return false; + } + for (i = 0; tracers[i]; i++) + if (!strcmp(tracers[i], name)) { + rval = true; + break; + } + tracefs_list_free(tracers); + return rval; +} + +static const char *find_default_tracer(void) +{ + int i; + bool notracer; + bool valid; + + for (i = 0; relevant_tracers[i]; i++) { + valid = tracer_valid(relevant_tracers[i], ¬racer); + if (notracer) + errx(EXIT_FAILURE, no_tracer_msg); + if (valid) + return relevant_tracers[i]; + } + return NULL; +} + +static bool toss_coin(struct drand48_data *buffer, unsigned int prob) +{ + long r; + + if (unlikely(lrand48_r(buffer, &r))) { + warnx("lrand48_r() failed"); + cleanup_exit(EXIT_FAILURE); + } + r = r % 1000L; + if (r < prob) + return true; + else + return false; +} + + +static long go_to_sleep(const struct entry *req) +{ + struct timespec future; + long delay = sleep_time; + + get_time_in_future(&future, delay); + + mutex_lock(&printstate.mutex); + while (!printstate_has_new_req_arrived(req)) { + cond_timedwait(&printstate.cond, &printstate.mutex, &future); + if (time_has_passed(&future)) + break; + }; + + if (printstate_has_new_req_arrived(req)) + delay = -1; + mutex_unlock(&printstate.mutex); + + return delay; +} + + +static void set_priority(void) +{ + int r; + pid_t pid; + struct sched_param param; + + memset(¶m, 0, sizeof(param)); + param.sched_priority = sched_pri; + + pid = getpid(); + r = sched_setscheduler(pid, sched_policy, ¶m); + + if (r != 0) + err(EXIT_FAILURE, "sched_setscheduler() failed"); +} + +pid_t latency_collector_gettid(void) +{ + return (pid_t) syscall(__NR_gettid); +} + +static void print_priority(void) +{ + pid_t tid; + int policy; + int r; + struct sched_param param; + + tid = latency_collector_gettid(); + r = pthread_getschedparam(pthread_self(), &policy, ¶m); + if (r != 0) { + warn("pthread_getschedparam() failed"); + cleanup_exit(EXIT_FAILURE); + } + mutex_lock(&print_mtx); + printf("Thread %d runs with scheduling policy %s and priority %d\n", + tid, policy_name(policy), param.sched_priority); + mutex_unlock(&print_mtx); +} + +static __always_inline +void __print_skipmessage(const struct short_msg *resize_msg, + const struct timespec *timestamp, char *buffer, + size_t bufspace, const struct entry *req, bool excuse, + const char *str) +{ + ssize_t bytes = 0; + char *p = &buffer[0]; + long us, sec; + int r; + + sec = timestamp->tv_sec; + us = timestamp->tv_nsec / 1000; + + if (resize_msg != NULL & |
