Skip to content

Public API

The stable surface that user code imports and calls. Everything in this page is covered by TraceML's compatibility contract across v0.x minor releases.

Decorators

traceml.decorators.trace_step

trace_step(model: Module)

Define a single training step boundary.

Source code in src/traceml/sdk/instrumentation.py
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
@contextmanager
def trace_step(model: nn.Module):
    """Define a single training step boundary."""
    if _traceml_disabled():
        yield
        return

    trace_state = get_trace_session_state()
    mem_tracker = StepMemoryTracker(model)
    step_completed = False

    try:
        mem_tracker.reset()
    except Exception as exc:
        _log_instrumentation_error("reset failed", exc)

    try:
        with timed_region(
            "_traceml_internal:step_time", scope="step", use_gpu=False
        ):
            with forward_auto_timer(), backward_auto_timer():
                if _should_auto_install_optimizer_timing():
                    ensure_optimizer_timing_installed()
                yield
                step_completed = True
    finally:
        if step_completed:
            trace_state.advance_step()

        try:
            mem_tracker.record()
        except Exception as exc:
            _log_instrumentation_error("record failed", exc)

        try:
            flush_step_events(model, trace_state.step)
        except Exception as exc:
            _log_instrumentation_error("flush failed", exc)

traceml.decorators.trace_model_instance

trace_model_instance(model: Module, sample_layer_memory: bool = True, trace_layer_forward_memory: bool = True, trace_layer_backward_memory: bool = True, trace_layer_forward_time: bool = True, trace_layer_backward_time: bool = True, trace_execution: bool = True, include_names: Optional[List[str]] = None, exclude_names: Optional[List[str]] = None, leaf_only: bool = True) -> None

Manually trace a PyTorch model instance.

This is primarily used by the deep profile and integration layers for model-level hook attachment. It is independent of the automatic patch policy configured by traceml.init(...).

Source code in src/traceml/sdk/instrumentation.py
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
def trace_model_instance(
    model: nn.Module,
    sample_layer_memory: bool = True,
    trace_layer_forward_memory: bool = True,
    trace_layer_backward_memory: bool = True,
    trace_layer_forward_time: bool = True,
    trace_layer_backward_time: bool = True,
    trace_execution: bool = True,
    include_names: Optional[List[str]] = None,
    exclude_names: Optional[List[str]] = None,
    leaf_only: bool = True,
) -> None:
    """
    Manually trace a PyTorch model instance.

    This is primarily used by the deep profile and integration layers for
    model-level hook attachment. It is independent of the automatic patch
    policy configured by `traceml.init(...)`.
    """
    if _traceml_disabled() or _traceml_profile() != "deep":
        return

    try:
        if not isinstance(model, nn.Module):
            raise TypeError("trace_model_instance expects an nn.Module.")

        if sample_layer_memory:
            model._traceml_include_names = include_names
            model._traceml_exclude_names = exclude_names
            model._traceml_leaf_only = leaf_only
            layer_memory = collect_layer_parameter_memory(model)
            model_queue.put(layer_memory)

        if trace_layer_forward_memory:
            attach_layer_forward_memory_hooks(
                model,
                include_names=include_names,
                exclude_names=exclude_names,
                leaf_only=leaf_only,
            )

        if trace_layer_backward_memory:
            attach_layer_backward_memory_hooks(
                model,
                include_names=include_names,
                exclude_names=exclude_names,
                leaf_only=leaf_only,
            )

        if trace_layer_forward_time:
            attach_layer_forward_time_hooks(
                model,
                include_names=include_names,
                exclude_names=exclude_names,
                leaf_only=leaf_only,
            )

        if trace_layer_backward_time:
            attach_layer_backward_time_hooks(
                model,
                include_names=include_names,
                exclude_names=exclude_names,
                leaf_only=leaf_only,
            )

        if trace_execution:
            attach_execution_entry_hooks(model)

    except Exception as exc:
        _log_instrumentation_error(
            "Failed to trace model instance",
            exc,
        )

Hugging Face integration

traceml.integrations.huggingface.TraceMLTrainer

TraceMLTrainer(*args, traceml_enabled: bool = True, traceml_kwargs: Optional[Dict[str, Any]] = None, **kwargs)

Bases: Trainer if HAS_TRANSFORMERS else object

A subclass of Hugging Face's Trainer that automatically integrates TraceML.

This class wraps the training_step with the trace_step context manager to capture step-level metrics (timing, memory, etc.).

Source code in src/traceml/integrations/huggingface.py
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
def __init__(
    self,
    *args,
    traceml_enabled: bool = True,
    traceml_kwargs: Optional[Dict[str, Any]] = None,
    **kwargs,
):
    if not HAS_TRANSFORMERS:
        raise ImportError(
            "TraceMLTrainer requires 'transformers' to be installed. "
            "Please run `pip install transformers`."
        )

    super().__init__(*args, **kwargs)
    self.traceml_enabled = traceml_enabled

    # If model-level tracing (Deep-Dive) is requested, apply it now
    self.traceml_kwargs = traceml_kwargs
    self._traceml_hooks_attached = False

training_step

training_step(model, inputs, *args, **kwargs) -> Any

Overridden training step to include TraceML instrumentation.

Source code in src/traceml/integrations/huggingface.py
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
def training_step(self, model, inputs, *args, **kwargs) -> Any:
    """
    Overridden training step to include TraceML instrumentation.
    """
    # BYPASS LOGIC:
    # If the user launched the script with `--disable-traceml` (setting TRACEML_DISABLED="1")
    # or if `traceml_enabled` is explicitly False, we short-circuit immediately.
    # This completely skips any hook attachments, memory tracking, and timing regions
    if TRACEML_DISABLED or not self.traceml_enabled:
        return super().training_step(model, inputs, *args, **kwargs)

    if self.traceml_enabled:
        # Lazily attach hooks on the first step to ensure we catch the
        # final wrapped/moved model (e.g. DDP, Accelerator)
        if self.traceml_kwargs is not None and (
            not self._traceml_hooks_attached
            or id(model) != getattr(self, "_attached_model_id", None)
        ):
            try:
                trace_model_instance(model, **self.traceml_kwargs)
                self._attached_model_id = id(model)
                self._traceml_hooks_attached = True
                logger.info(
                    "[TraceML] Deep-Dive model tracing initialized (lazy)."
                )
            except Exception as e:
                logger.error(
                    f"[TraceML] Failed to initialize model tracing: {e}"
                )

        with trace_step(model):
            return super().training_step(model, inputs, *args, **kwargs)

    return super().training_step(model, inputs, *args, **kwargs)

PyTorch Lightning integration

traceml.integrations.lightning.TraceMLCallback

TraceMLCallback()

Bases: Callback

Official TraceML Callback for PyTorch Lightning.

Captures full step time (forward + backward + optimizer) as well as individual phase timings. Safely handles gradient accumulation by treating each micro-batch as a step, providing 0-duration optimizer events on accumulating steps to preserve dashboard step alignment.

Source code in src/traceml/integrations/lightning.py
49
50
51
52
53
54
55
56
57
def __init__(self):
    super().__init__()
    self._traceml_step_ctx = None
    self._forward_ctx = None
    self._backward_ctx = None
    self._optimizer_ctx = None

    self._mem_tracker = None
    self._opt_step_occurred = False

CLI

TraceML ships with a CLI entry point installed as traceml.

traceml watch <script>    # run script with live terminal dashboard
traceml run <script>      # run script with minimal instrumentation
traceml deep <script>     # run with full instrumentation (step + memory + layer)

See traceml --help for the full set of options.