summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorbd <bdunahu@operationnull.com>2025-06-10 19:05:34 -0400
committerbd <bdunahu@operationnull.com>2025-06-10 19:05:34 -0400
commitf4b54b5a2e99be859558331186f725fbfa224594 (patch)
treec6b51ee56cffaa686ad26be972b6fd166f5687ea
parentf7c41ef8bb7993d09aa54cd9b3773a55a662ffb2 (diff)
Profile native time (plausible results)
-rw-r--r--mini-scalene.py78
1 files changed, 58 insertions, 20 deletions
diff --git a/mini-scalene.py b/mini-scalene.py
index 5d8f6b2..e957bb0 100644
--- a/mini-scalene.py
+++ b/mini-scalene.py
@@ -1,15 +1,27 @@
import sys
import argparse
+import os
import threading
import traceback
-import runpy
import atexit
import signal
import asyncio
+import time
from typing import cast
from types import FrameType
from collections import defaultdict
+the_globals = {
+ '__name__': '__main__',
+ '__doc__': None,
+ '__package__': None,
+ '__loader__': globals()['__loader__'],
+ '__spec__': None,
+ '__annotations__': {},
+ '__builtins__': globals()['__builtins__'],
+ '__file__': None,
+ '__cached__': None,
+}
def parse_arguments():
'''Parse CLI args'''
@@ -27,7 +39,7 @@ def parse_arguments():
return parser.parse_args()
-class mini_scalene:
+class mini_scalene(object):
'''A stripped-down version of SCALENE which tallies active lines during
execution.'''
@@ -35,10 +47,15 @@ class mini_scalene:
# mini_scalene.frame_to_string) and values represent number of times
# sampled.
cpu_samples = defaultdict(lambda: 0)
+ cpu_samples_c = defaultdict(lambda: 0)
+ # number of times samples have been collected
total_cpu_samples = 0
# the time, in seconds, between samples
signal_interval = 0.01
+ # the timestamp recorded last signal
+ last_signal_time = 0.0
+
# if we should try to profile asynchronous code. Used to observe
# effectiveness of the implementation.
profile_async = True
@@ -49,6 +66,12 @@ class mini_scalene:
signal.setitimer(signal.ITIMER_PROF,
self.signal_interval,
self.signal_interval)
+ mini_scalene.last_signal_time = mini_scalene.gettime()
+
+ @staticmethod
+ def gettime():
+ '''get the wallclock time'''
+ return time.perf_counter()
@staticmethod
def start(profile_async):
@@ -57,22 +80,20 @@ class mini_scalene:
@staticmethod
def exit_handler():
- '''Turn off our profiling signals and pretty-print profiling information.'''
+ '''Turn off profiling signals & pretty-print profiling information.'''
mini_scalene.disable_signals()
# If we've collected any samples, dump them.
- print("CPU usage:")
+ print("CPU usage (Python):")
if mini_scalene.total_cpu_samples > 0:
- # Sort the samples in descending order by number of samples.
- mini_scalene.cpu_samples = {k: v for k, v in sorted(
- mini_scalene.cpu_samples.items(), key=lambda item: item[1],
- reverse=True)}
- for key in mini_scalene.cpu_samples:
- print(key + " : " +
- str(mini_scalene.cpu_samples[key] *
- 100 / mini_scalene.total_cpu_samples) +
- "%" + " (" +
- str(mini_scalene.cpu_samples[key]) +
- " total samples)")
+ for key in mini_scalene.sort_samples(mini_scalene.cpu_samples):
+ print(f"{key} : "
+ f"{mini_scalene.cpu_samples[key] * 100 / mini_scalene.total_cpu_samples:.3f} % "
+ f"({mini_scalene.cpu_samples[key]:.1f} total samples)")
+ print("CPU usage (Native):")
+ for key in mini_scalene.sort_samples(mini_scalene.cpu_samples_c):
+ print(f"{key} : "
+ f"{mini_scalene.cpu_samples_c[key] * 100 / mini_scalene.total_cpu_samples:.3f} % "
+ f"({mini_scalene.cpu_samples_c[key]:.1f} total samples)")
else:
print("(did not run long enough to profile)")
@@ -84,11 +105,20 @@ class mini_scalene:
@staticmethod
def cpu_signal_handler(sig, frame):
+ elapsed_since_last_signal = mini_scalene.gettime() - \
+ mini_scalene.last_signal_time
+ c_time_norm = (elapsed_since_last_signal -
+ mini_scalene.signal_interval) / \
+ mini_scalene.signal_interval
+
keys = mini_scalene.compute_frames_to_record(frame)
for key in keys:
mini_scalene.cpu_samples[mini_scalene.frame_to_string(key)] += 1
- mini_scalene.total_cpu_samples += 1
- return
+ mini_scalene.cpu_samples_c[mini_scalene.frame_to_string(
+ key)] += c_time_norm
+ mini_scalene.total_cpu_samples += elapsed_since_last_signal / \
+ mini_scalene.signal_interval
+ mini_scalene.last_signal_time = mini_scalene.gettime()
@staticmethod
def compute_frames_to_record(this_frame):
@@ -162,6 +192,13 @@ class mini_scalene:
return asyncio.get_event_loop_policy()._local._loop is not None
@staticmethod
+ def sort_samples(sample_dict):
+ '''Returns SAMPLE_DICT in descending order by number of samples.'''
+ return {k: v for k, v in sorted(sample_dict.items(),
+ key=lambda item: item[1],
+ reverse=True)}
+
+ @staticmethod
def filter_duplicated_frames(frames) -> bool:
s = set()
dup = []
@@ -183,11 +220,12 @@ class mini_scalene:
def main():
args = parse_arguments()
- mini_scalene().start(args.async_off)
-
sys.argv = [args.script] + args.s_args
try:
- runpy.run_path(args.script, run_name="__main__")
+ with open(args.script, 'rb') as fp:
+ code = compile(fp.read(), args.script, "exec")
+ mini_scalene().start(args.async_off)
+ exec(code, the_globals)
except Exception:
traceback.print_exc()