diff options
| -rw-r--r-- | mini-scalene.py | 78 |
1 files changed, 58 insertions, 20 deletions
diff --git a/mini-scalene.py b/mini-scalene.py index 5d8f6b2..e957bb0 100644 --- a/mini-scalene.py +++ b/mini-scalene.py @@ -1,15 +1,27 @@ import sys import argparse +import os import threading import traceback -import runpy import atexit import signal import asyncio +import time from typing import cast from types import FrameType from collections import defaultdict +the_globals = { + '__name__': '__main__', + '__doc__': None, + '__package__': None, + '__loader__': globals()['__loader__'], + '__spec__': None, + '__annotations__': {}, + '__builtins__': globals()['__builtins__'], + '__file__': None, + '__cached__': None, +} def parse_arguments(): '''Parse CLI args''' @@ -27,7 +39,7 @@ def parse_arguments(): return parser.parse_args() -class mini_scalene: +class mini_scalene(object): '''A stripped-down version of SCALENE which tallies active lines during execution.''' @@ -35,10 +47,15 @@ class mini_scalene: # mini_scalene.frame_to_string) and values represent number of times # sampled. cpu_samples = defaultdict(lambda: 0) + cpu_samples_c = defaultdict(lambda: 0) + # number of times samples have been collected total_cpu_samples = 0 # the time, in seconds, between samples signal_interval = 0.01 + # the timestamp recorded last signal + last_signal_time = 0.0 + # if we should try to profile asynchronous code. Used to observe # effectiveness of the implementation. profile_async = True @@ -49,6 +66,12 @@ class mini_scalene: signal.setitimer(signal.ITIMER_PROF, self.signal_interval, self.signal_interval) + mini_scalene.last_signal_time = mini_scalene.gettime() + + @staticmethod + def gettime(): + '''get the wallclock time''' + return time.perf_counter() @staticmethod def start(profile_async): @@ -57,22 +80,20 @@ class mini_scalene: @staticmethod def exit_handler(): - '''Turn off our profiling signals and pretty-print profiling information.''' + '''Turn off profiling signals & pretty-print profiling information.''' mini_scalene.disable_signals() # If we've collected any samples, dump them. - print("CPU usage:") + print("CPU usage (Python):") if mini_scalene.total_cpu_samples > 0: - # Sort the samples in descending order by number of samples. - mini_scalene.cpu_samples = {k: v for k, v in sorted( - mini_scalene.cpu_samples.items(), key=lambda item: item[1], - reverse=True)} - for key in mini_scalene.cpu_samples: - print(key + " : " + - str(mini_scalene.cpu_samples[key] * - 100 / mini_scalene.total_cpu_samples) + - "%" + " (" + - str(mini_scalene.cpu_samples[key]) + - " total samples)") + for key in mini_scalene.sort_samples(mini_scalene.cpu_samples): + print(f"{key} : " + f"{mini_scalene.cpu_samples[key] * 100 / mini_scalene.total_cpu_samples:.3f} % " + f"({mini_scalene.cpu_samples[key]:.1f} total samples)") + print("CPU usage (Native):") + for key in mini_scalene.sort_samples(mini_scalene.cpu_samples_c): + print(f"{key} : " + f"{mini_scalene.cpu_samples_c[key] * 100 / mini_scalene.total_cpu_samples:.3f} % " + f"({mini_scalene.cpu_samples_c[key]:.1f} total samples)") else: print("(did not run long enough to profile)") @@ -84,11 +105,20 @@ class mini_scalene: @staticmethod def cpu_signal_handler(sig, frame): + elapsed_since_last_signal = mini_scalene.gettime() - \ + mini_scalene.last_signal_time + c_time_norm = (elapsed_since_last_signal - + mini_scalene.signal_interval) / \ + mini_scalene.signal_interval + keys = mini_scalene.compute_frames_to_record(frame) for key in keys: mini_scalene.cpu_samples[mini_scalene.frame_to_string(key)] += 1 - mini_scalene.total_cpu_samples += 1 - return + mini_scalene.cpu_samples_c[mini_scalene.frame_to_string( + key)] += c_time_norm + mini_scalene.total_cpu_samples += elapsed_since_last_signal / \ + mini_scalene.signal_interval + mini_scalene.last_signal_time = mini_scalene.gettime() @staticmethod def compute_frames_to_record(this_frame): @@ -162,6 +192,13 @@ class mini_scalene: return asyncio.get_event_loop_policy()._local._loop is not None @staticmethod + def sort_samples(sample_dict): + '''Returns SAMPLE_DICT in descending order by number of samples.''' + return {k: v for k, v in sorted(sample_dict.items(), + key=lambda item: item[1], + reverse=True)} + + @staticmethod def filter_duplicated_frames(frames) -> bool: s = set() dup = [] @@ -183,11 +220,12 @@ class mini_scalene: def main(): args = parse_arguments() - mini_scalene().start(args.async_off) - sys.argv = [args.script] + args.s_args try: - runpy.run_path(args.script, run_name="__main__") + with open(args.script, 'rb') as fp: + code = compile(fp.read(), args.script, "exec") + mini_scalene().start(args.async_off) + exec(code, the_globals) except Exception: traceback.print_exc() |
