The stable surface that user code imports and calls. Everything in this page is covered by TraceML's compatibility contract across v0.x minor releases.
@contextmanagerdeftrace_step(model:nn.Module):"""Define a single training step boundary."""if_traceml_disabled():yieldreturntrace_state=get_trace_session_state()mem_tracker=StepMemoryTracker(model)step_completed=Falsetry:mem_tracker.reset()exceptExceptionasexc:_log_instrumentation_error("reset failed",exc)try:withtimed_region("_traceml_internal:step_time",scope="step",use_gpu=False):withforward_auto_timer(),backward_auto_timer():if_should_auto_install_optimizer_timing():ensure_optimizer_timing_installed()yieldstep_completed=Truefinally:ifstep_completed:trace_state.advance_step()try:mem_tracker.record()exceptExceptionasexc:_log_instrumentation_error("record failed",exc)try:flush_step_events(model,trace_state.step)exceptExceptionasexc:_log_instrumentation_error("flush failed",exc)
This is primarily used by the deep profile and integration layers for
model-level hook attachment. It is independent of the automatic patch
policy configured by traceml.init(...).
deftrace_model_instance(model:nn.Module,sample_layer_memory:bool=True,trace_layer_forward_memory:bool=True,trace_layer_backward_memory:bool=True,trace_layer_forward_time:bool=True,trace_layer_backward_time:bool=True,trace_execution:bool=True,include_names:Optional[List[str]]=None,exclude_names:Optional[List[str]]=None,leaf_only:bool=True,)->None:""" Manually trace a PyTorch model instance. This is primarily used by the deep profile and integration layers for model-level hook attachment. It is independent of the automatic patch policy configured by `traceml.init(...)`. """if_traceml_disabled()or_traceml_profile()!="deep":returntry:ifnotisinstance(model,nn.Module):raiseTypeError("trace_model_instance expects an nn.Module.")ifsample_layer_memory:model._traceml_include_names=include_namesmodel._traceml_exclude_names=exclude_namesmodel._traceml_leaf_only=leaf_onlylayer_memory=collect_layer_parameter_memory(model)model_queue.put(layer_memory)iftrace_layer_forward_memory:attach_layer_forward_memory_hooks(model,include_names=include_names,exclude_names=exclude_names,leaf_only=leaf_only,)iftrace_layer_backward_memory:attach_layer_backward_memory_hooks(model,include_names=include_names,exclude_names=exclude_names,leaf_only=leaf_only,)iftrace_layer_forward_time:attach_layer_forward_time_hooks(model,include_names=include_names,exclude_names=exclude_names,leaf_only=leaf_only,)iftrace_layer_backward_time:attach_layer_backward_time_hooks(model,include_names=include_names,exclude_names=exclude_names,leaf_only=leaf_only,)iftrace_execution:attach_execution_entry_hooks(model)exceptExceptionasexc:_log_instrumentation_error("Failed to trace model instance",exc,)
A subclass of Hugging Face's Trainer that automatically integrates TraceML.
This class wraps the training_step with the trace_step context manager
to capture step-level metrics (timing, memory, etc.).
Source code in src/traceml/integrations/huggingface.py
30313233343536373839404142434445464748
def__init__(self,*args,traceml_enabled:bool=True,traceml_kwargs:Optional[Dict[str,Any]]=None,**kwargs,):ifnotHAS_TRANSFORMERS:raiseImportError("TraceMLTrainer requires 'transformers' to be installed. ""Please run `pip install transformers`.")super().__init__(*args,**kwargs)self.traceml_enabled=traceml_enabled# If model-level tracing (Deep-Dive) is requested, apply it nowself.traceml_kwargs=traceml_kwargsself._traceml_hooks_attached=False
deftraining_step(self,model,inputs,*args,**kwargs)->Any:""" Overridden training step to include TraceML instrumentation. """# BYPASS LOGIC:# If the user launched the script with `--disable-traceml` (setting TRACEML_DISABLED="1")# or if `traceml_enabled` is explicitly False, we short-circuit immediately.# This completely skips any hook attachments, memory tracking, and timing regionsifTRACEML_DISABLEDornotself.traceml_enabled:returnsuper().training_step(model,inputs,*args,**kwargs)ifself.traceml_enabled:# Lazily attach hooks on the first step to ensure we catch the# final wrapped/moved model (e.g. DDP, Accelerator)ifself.traceml_kwargsisnotNoneand(notself._traceml_hooks_attachedorid(model)!=getattr(self,"_attached_model_id",None)):try:trace_model_instance(model,**self.traceml_kwargs)self._attached_model_id=id(model)self._traceml_hooks_attached=Truelogger.info("[TraceML] Deep-Dive model tracing initialized (lazy).")exceptExceptionase:logger.error(f"[TraceML] Failed to initialize model tracing: {e}")withtrace_step(model):returnsuper().training_step(model,inputs,*args,**kwargs)returnsuper().training_step(model,inputs,*args,**kwargs)
Captures full step time (forward + backward + optimizer) as well as
individual phase timings. Safely handles gradient accumulation by
treating each micro-batch as a step, providing 0-duration optimizer
events on accumulating steps to preserve dashboard step alignment.
Source code in src/traceml/integrations/lightning.py
TraceML ships with a CLI entry point installed as traceml.
tracemlwatch<script># run script with live terminal dashboard
tracemlrun<script># run script with minimal instrumentation
tracemldeep<script># run with full instrumentation (step + memory + layer)