Source code for dagster.core.execution.context.output

import warnings
from typing import TYPE_CHECKING, Any, Dict, Iterator, List, Optional, Union, cast

from dagster import check
from dagster.core.definitions.events import (
    AssetKey,
    AssetMaterialization,
    AssetObservation,
    Materialization,
    MetadataEntry,
    PartitionMetadataEntry,
)
from dagster.core.definitions.op_definition import OpDefinition
from dagster.core.definitions.partition_key_range import PartitionKeyRange
from dagster.core.definitions.solid_definition import SolidDefinition
from dagster.core.definitions.time_window_partitions import TimeWindow
from dagster.core.errors import DagsterInvariantViolationError
from dagster.core.execution.plan.utils import build_resources_for_manager

if TYPE_CHECKING:
    from dagster.core.definitions import PipelineDefinition
    from dagster.core.definitions.resource_definition import Resources
    from dagster.core.events import DagsterEvent
    from dagster.core.execution.context.system import StepExecutionContext
    from dagster.core.execution.plan.outputs import StepOutputHandle
    from dagster.core.execution.plan.plan import ExecutionPlan
    from dagster.core.log_manager import DagsterLogManager
    from dagster.core.system_config.objects import ResolvedRunConfig
    from dagster.core.types.dagster_type import DagsterType

RUN_ID_PLACEHOLDER = "__EPHEMERAL_RUN_ID"


[docs]class OutputContext:
    """
    The context object that is available to the `handle_output` method of an :py:class:`IOManager`.

    Attributes:
        step_key (Optional[str]): The step_key for the compute step that produced the output.
        name (Optional[str]): The name of the output that produced the output.
        pipeline_name (Optional[str]): The name of the pipeline definition.
        run_id (Optional[str]): The id of the run that produced the output.
        metadata (Optional[Dict[str, Any]]): A dict of the metadata that is assigned to the
            OutputDefinition that produced the output.
        mapping_key (Optional[str]): The key that identifies a unique mapped output. None for regular outputs.
        config (Optional[Any]): The configuration for the output.
        solid_def (Optional[SolidDefinition]): The definition of the solid that produced the output.
        dagster_type (Optional[DagsterType]): The type of this output.
        log (Optional[DagsterLogManager]): The log manager to use for this output.
        version (Optional[str]): (Experimental) The version of the output.
        resource_config (Optional[Dict[str, Any]]): The config associated with the resource that
            initializes the RootInputManager.
        resources (Optional[Resources]): The resources required by the output manager, specified by the
            `required_resource_keys` parameter.
        op_def (Optional[OpDefinition]): The definition of the op that produced the output.
    """

    def __init__(
        self,
        step_key: Optional[str] = None,
        name: Optional[str] = None,
        pipeline_name: Optional[str] = None,
        run_id: Optional[str] = None,
        metadata: Optional[Dict[str, Any]] = None,
        mapping_key: Optional[str] = None,
        config: Optional[Any] = None,
        solid_def: Optional["SolidDefinition"] = None,
        dagster_type: Optional["DagsterType"] = None,
        log_manager: Optional["DagsterLogManager"] = None,
        version: Optional[str] = None,
        resource_config: Optional[Dict[str, Any]] = None,
        resources: Optional[Union["Resources", Dict[str, Any]]] = None,
        step_context: Optional["StepExecutionContext"] = None,
        op_def: Optional["OpDefinition"] = None,
    ):
        from dagster.core.definitions.resource_definition import IContainsGenerator, Resources
        from dagster.core.execution.build_resources import build_resources

        self._step_key = step_key
        self._name = name
        self._pipeline_name = pipeline_name
        self._run_id = run_id
        self._metadata = metadata
        self._mapping_key = mapping_key
        self._config = config
        check.invariant(
            solid_def is None or op_def is None, "Can't provide both a solid_def and an op_def arg"
        )
        self._solid_def = solid_def or op_def
        self._dagster_type = dagster_type
        self._log = log_manager
        self._version = version
        self._resource_config = resource_config
        self._step_context = step_context

        if isinstance(resources, Resources):
            self._resources_cm = None
            self._resources = resources
        else:
            self._resources_cm = build_resources(
                check.opt_dict_param(resources, "resources", key_type=str)
            )
            self._resources = self._resources_cm.__enter__()  # pylint: disable=no-member
            self._resources_contain_cm = isinstance(self._resources, IContainsGenerator)
            self._cm_scope_entered = False

        self._events: List["DagsterEvent"] = []
        self._user_events: List[Union[AssetMaterialization, AssetObservation, Materialization]] = []
        self._metadata_entries: Optional[List[Union[MetadataEntry, PartitionMetadataEntry]]] = None

    def __enter__(self):
        if self._resources_cm:
            self._cm_scope_entered = True
        return self

    def __exit__(self, *exc):
        if self._resources_cm:
            self._resources_cm.__exit__(*exc)  # pylint: disable=no-member

    def __del__(self):
        if self._resources_cm and self._resources_contain_cm and not self._cm_scope_entered:
            self._resources_cm.__exit__(None, None, None)  # pylint: disable=no-member

    @property
    def step_key(self) -> str:
        if self._step_key is None:
            raise DagsterInvariantViolationError(
                "Attempting to access step_key, "
                "but it was not provided when constructing the OutputContext"
            )

        return self._step_key

    @property
    def name(self) -> str:
        if self._name is None:
            raise DagsterInvariantViolationError(
                "Attempting to access name, "
                "but it was not provided when constructing the OutputContext"
            )

        return self._name

    @property
    def pipeline_name(self) -> str:
        if self._pipeline_name is None:
            raise DagsterInvariantViolationError(
                "Attempting to access pipeline_name, "
                "but it was not provided when constructing the OutputContext"
            )

        return self._pipeline_name

    @property
    def run_id(self) -> str:
        if self._run_id is None:
            raise DagsterInvariantViolationError(
                "Attempting to access run_id, "
                "but it was not provided when constructing the OutputContext"
            )

        return self._run_id

    @property
    def metadata(self) -> Optional[Dict[str, Any]]:
        return self._metadata

    @property
    def mapping_key(self) -> Optional[str]:
        return self._mapping_key

    @property
    def config(self) -> Any:
        return self._config

    @property
    def solid_def(self) -> "SolidDefinition":
        if self._solid_def is None:
            raise DagsterInvariantViolationError(
                "Attempting to access solid_def, "
                "but it was not provided when constructing the OutputContext"
            )

        return self._solid_def

    @property
    def op_def(self) -> "OpDefinition":
        if self._solid_def is None:
            raise DagsterInvariantViolationError(
                "Attempting to access op_def, "
                "but it was not provided when constructing the OutputContext"
            )

        return cast(OpDefinition, self._solid_def)

    @property
    def dagster_type(self) -> "DagsterType":
        if self._dagster_type is None:
            raise DagsterInvariantViolationError(
                "Attempting to access dagster_type, "
                "but it was not provided when constructing the OutputContext"
            )

        return self._dagster_type

    @property
    def log(self) -> "DagsterLogManager":
        if self._log is None:
            raise DagsterInvariantViolationError(
                "Attempting to access log, "
                "but it was not provided when constructing the OutputContext"
            )

        return self._log

    @property
    def version(self) -> Optional[str]:
        return self._version

    @property
    def resource_config(self) -> Optional[Dict[str, Any]]:
        return self._resource_config

    @property
    def resources(self) -> Any:
        if self._resources is None:
            raise DagsterInvariantViolationError(
                "Attempting to access resources, "
                "but it was not provided when constructing the OutputContext"
            )

        if self._resources_cm and self._resources_contain_cm and not self._cm_scope_entered:
            raise DagsterInvariantViolationError(
                "At least one provided resource is a generator, but attempting to access "
                "resources outside of context manager scope. You can use the following syntax to "
                "open a context manager: `with build_output_context(...) as context:`"
            )
        return self._resources

    @property
    def asset_key(self) -> Optional[AssetKey]:
        matching_output_defs = [
            output_def
            for output_def in cast(SolidDefinition, self._solid_def).output_defs
            if output_def.name == self.name
        ]
        check.invariant(len(matching_output_defs) == 1)
        return matching_output_defs[0].get_asset_key(self)

    @property
    def step_context(self) -> "StepExecutionContext":
        if self._step_context is None:
            raise DagsterInvariantViolationError(
                "Attempting to access step_context, "
                "but it was not provided when constructing the OutputContext"
            )

        return self._step_context

    @property
    def has_partition_key(self) -> bool:
        """Whether the current run is a partitioned run"""
        return self.step_context.has_partition_key

    @property
    def partition_key(self) -> str:
        """The partition key for the current run.

        Raises an error if the current run is not a partitioned run.
        """
        return self.step_context.partition_key

    @property
    def has_asset_partitions(self) -> bool:
        if self._step_context is not None:
            return self._step_context.has_asset_partitions_for_output(self.name)
        else:
            return False

    @property
    def asset_partition_key(self) -> str:
        """The partition key for output asset.

        Raises an error if the output asset has no partitioning, or if the run covers a partition
        range for the output asset.
        """
        return self.step_context.asset_partition_key_for_output(self.name)

    @property
    def asset_partition_key_range(self) -> PartitionKeyRange:
        """The partition key range for output asset.

        Raises an error if the output asset has no partitioning.
        """
        return self.step_context.asset_partition_key_range_for_output(self.name)

    @property
    def asset_partitions_time_window(self) -> TimeWindow:
        """The time window for the partitions of the output asset.

        Raises an error if either of the following are true:
        - The output asset has no partitioning.
        - The output asset is not partitioned with a TimeWindowPartitionsDefinition.
        """
        return self.step_context.asset_partitions_time_window_for_output(self.name)

[docs]    def get_run_scoped_output_identifier(self) -> List[str]:
        """Utility method to get a collection of identifiers that as a whole represent a unique
        step output.

        The unique identifier collection consists of

        - ``run_id``: the id of the run which generates the output.
            Note: This method also handles the re-execution memoization logic. If the step that
            generates the output is skipped in the re-execution, the ``run_id`` will be the id
            of its parent run.
        - ``step_key``: the key for a compute step.
        - ``name``: the name of the output. (default: 'result').

        Returns:
            List[str, ...]: A list of identifiers, i.e. run id, step key, and output name
        """

        warnings.warn(
            "`OutputContext.get_run_scoped_output_identifier` is deprecated. Use "
            "`OutputContext.get_output_identifier` instead."
        )
        # if run_id is None and this is a re-execution, it means we failed to find its source run id
        check.invariant(
            self.run_id is not None,
            "Unable to find the run scoped output identifier: run_id is None on OutputContext.",
        )
        check.invariant(
            self.step_key is not None,
            "Unable to find the run scoped output identifier: step_key is None on OutputContext.",
        )
        check.invariant(
            self.name is not None,
            "Unable to find the run scoped output identifier: name is None on OutputContext.",
        )
        run_id = cast(str, self.run_id)
        step_key = cast(str, self.step_key)
        name = cast(str, self.name)

        if self.mapping_key:
            return [run_id, step_key, name, self.mapping_key]

        return [run_id, step_key, name]

[docs]    def get_output_identifier(self) -> List[str]:
        """Utility method to get a collection of identifiers that as a whole represent a unique
        step output.

        If not using memoization, the unique identifier collection consists of

        - ``run_id``: the id of the run which generates the output.
            Note: This method also handles the re-execution memoization logic. If the step that
            generates the output is skipped in the re-execution, the ``run_id`` will be the id
            of its parent run.
        - ``step_key``: the key for a compute step.
        - ``name``: the name of the output. (default: 'result').

        If using memoization, the ``version`` corresponding to the step output is used in place of
        the ``run_id``.

        Returns:
            List[str, ...]: A list of identifiers, i.e. (run_id or version), step_key, and output_name
        """
        version = self.version
        step_key = self.step_key
        name = self.name
        if version is not None:
            check.invariant(
                self.mapping_key is None,
                f"Mapping key and version both provided for output '{name}' of step '{step_key}'. "
                "Dynamic mapping is not supported when using versioning.",
            )
            identifier = ["versioned_outputs", version, step_key, name]
        else:
            run_id = self.run_id
            identifier = [run_id, step_key, name]
            if self.mapping_key:
                identifier.append(self.mapping_key)

        return identifier

[docs]    def log_event(
        self, event: Union[AssetObservation, AssetMaterialization, Materialization]
    ) -> None:
        """Log an AssetMaterialization or AssetObservation from within the body of an io manager's `handle_output` method.

        Events logged with this method will appear in the event log.

        Args:
            event (Union[AssetMaterialization, Materialization, AssetObservation]): The event to log.

        Examples:

        .. code-block:: python

            from dagster import IOManager, AssetMaterialization

            class MyIOManager(IOManager):
                def handle_output(self, context, obj):
                    context.log_event(AssetMaterialization("foo"))
        """
        from dagster.core.events import DagsterEvent

        if isinstance(event, (AssetMaterialization, Materialization)):
            if self._step_context:
                self._events.append(
                    DagsterEvent.asset_materialization(
                        self._step_context,
                        event,
                        self._step_context.get_input_lineage(),
                    )
                )
            self._user_events.append(event)
        elif isinstance(event, AssetObservation):
            if self._step_context:
                self._events.append(DagsterEvent.asset_observation(self._step_context, event))
            self._user_events.append(event)
        else:
            check.failed("Unexpected event {event}".format(event=event))

[docs]    def consume_events(self) -> Iterator["DagsterEvent"]:
        """Pops and yields all user-generated events that have been recorded from this context.

        If consume_events has not yet been called, this will yield all logged events since the call to `handle_output`. If consume_events has been called, it will yield all events since the last time consume_events was called. Designed for internal use. Users should never need to invoke this method.
        """

        events = self._events
        self._events = []
        yield from events

[docs]    def get_logged_events(
        self,
    ) -> List[Union[AssetMaterialization, Materialization, AssetObservation]]:
        """Retrieve the list of user-generated events that were logged via the context.


        User-generated events that were yielded will not appear in this list.

        **Examples:**

        .. code-block:: python

            from dagster import IOManager, build_output_context, AssetMaterialization

            class MyIOManager(IOManager):
                def handle_output(self, context, obj):
                    ...

            def test_handle_output():
                mgr = MyIOManager()
                context = build_output_context()
                mgr.handle_output(context)
                all_user_events = context.get_logged_events()
                materializations = [event for event in all_user_events if isinstance(event, AssetMaterialization)]
                ...
        """

        return self._user_events

[docs]    def add_output_metadata(self, metadata: Dict[str, Any]) -> None:
        """Add a dictionary of metadata to the handled output.

        Metadata entries added will show up in the HANDLED_OUTPUT and ASSET_MATERIALIZATION events for the run.

        Args:
            metadata (Dict[str, Any]): A metadata dictionary to log

        Examples:

        .. code-block:: python

            from dagster import IOManager

            class MyIOManager(IOManager):
                def handle_output(self, context, obj):
                    context.add_output_metadata({"foo": "bar"})
        """
        from dagster.core.definitions.metadata import normalize_metadata

        self._metadata_entries = normalize_metadata(metadata, [])

[docs]    def get_logged_metadata_entries(
        self,
    ) -> List[Union[MetadataEntry, PartitionMetadataEntry]]:
        """Get the list of metadata entries that have been logged for use with this output."""
        return self._metadata_entries or []

[docs]    def consume_logged_metadata_entries(
        self,
    ) -> List[Union[MetadataEntry, PartitionMetadataEntry]]:
        """Pops and yields all user-generated metadata entries that have been recorded from this context.

        If consume_logged_metadata_entries has not yet been called, this will yield all logged events since the call to `handle_output`. If consume_logged_metadata_entries has been called, it will yield all events since the last time consume_logged_metadata_entries was called. Designed for internal use. Users should never need to invoke this method.
        """
        result = self._metadata_entries
        self._metadata_entries = []
        return result or []


def get_output_context(
    execution_plan: "ExecutionPlan",
    pipeline_def: "PipelineDefinition",
    resolved_run_config: "ResolvedRunConfig",
    step_output_handle: "StepOutputHandle",
    run_id: Optional[str],
    log_manager: Optional["DagsterLogManager"],
    step_context: Optional["StepExecutionContext"],
    resources: Optional["Resources"],
    version: Optional[str],
) -> "OutputContext":
    """
    Args:
        run_id (str): The run ID of the run that produced the output, not necessarily the run that
            the context will be used in.
    """

    step = execution_plan.get_step_by_key(step_output_handle.step_key)
    # get config
    solid_config = resolved_run_config.solids[step.solid_handle.to_string()]
    outputs_config = solid_config.outputs

    if outputs_config:
        output_config = outputs_config.get_output_manager_config(step_output_handle.output_name)
    else:
        output_config = None

    step_output = execution_plan.get_step_output(step_output_handle)
    output_def = pipeline_def.get_solid(step_output.solid_handle).output_def_named(step_output.name)

    io_manager_key = output_def.io_manager_key
    resource_config = resolved_run_config.resources[io_manager_key].config

    if step_context:
        check.invariant(
            not resources,
            "Expected either resources or step context to be set, but "
            "received both. If step context is provided, resources for IO manager will be "
            "retrieved off of that.",
        )
        resources = build_resources_for_manager(io_manager_key, step_context)

    return OutputContext(
        step_key=step_output_handle.step_key,
        name=step_output_handle.output_name,
        pipeline_name=pipeline_def.name,
        run_id=run_id,
        metadata=output_def.metadata,
        mapping_key=step_output_handle.mapping_key,
        config=output_config,
        solid_def=pipeline_def.get_solid(step.solid_handle).definition,
        dagster_type=output_def.dagster_type,
        log_manager=log_manager,
        version=version,
        step_context=step_context,
        resource_config=resource_config,
        resources=resources,
    )


def step_output_version(
    pipeline_def: "PipelineDefinition",
    execution_plan: "ExecutionPlan",
    resolved_run_config: "ResolvedRunConfig",
    step_output_handle: "StepOutputHandle",
) -> Optional[str]:
    from dagster.core.execution.resolve_versions import resolve_step_output_versions

    step_output_versions = resolve_step_output_versions(
        pipeline_def, execution_plan, resolved_run_config
    )
    return (
        step_output_versions[step_output_handle]
        if step_output_handle in step_output_versions
        else None
    )


[docs]def build_output_context(
    step_key: Optional[str] = None,
    name: Optional[str] = None,
    metadata: Optional[Dict[str, Any]] = None,
    run_id: Optional[str] = None,
    mapping_key: Optional[str] = None,
    config: Optional[Any] = None,
    dagster_type: Optional["DagsterType"] = None,
    version: Optional[str] = None,
    resource_config: Optional[Dict[str, Any]] = None,
    resources: Optional[Dict[str, Any]] = None,
    solid_def: Optional[SolidDefinition] = None,
    op_def: Optional[OpDefinition] = None,
) -> "OutputContext":
    """Builds output context from provided parameters.

    ``build_output_context`` can be used as either a function, or a context manager. If resources
    that are also context managers are provided, then ``build_output_context`` must be used as a
    context manager.

    Args:
        step_key (Optional[str]): The step_key for the compute step that produced the output.
        name (Optional[str]): The name of the output that produced the output.
        metadata (Optional[Dict[str, Any]]): A dict of the metadata that is assigned to the
            OutputDefinition that produced the output.
        mapping_key (Optional[str]): The key that identifies a unique mapped output. None for regular outputs.
        config (Optional[Any]): The configuration for the output.
        dagster_type (Optional[DagsterType]): The type of this output.
        version (Optional[str]): (Experimental) The version of the output.
        resource_config (Optional[Dict[str, Any]]): The resource config to make available from the
            input context. This usually corresponds to the config provided to the resource that
            loads the output manager.
        resources (Optional[Resources]): The resources to make available from the context.
            For a given key, you can provide either an actual instance of an object, or a resource
            definition.
        solid_def (Optional[SolidDefinition]): The definition of the solid that produced the output.
        op_def (Optional[OpDefinition]): The definition of the solid that produced the output.

    Examples:

        .. code-block:: python

            build_output_context()

            with build_output_context(resources={"foo": context_manager_resource}) as context:
                do_something

    """
    from dagster.core.execution.context_creation_pipeline import initialize_console_manager
    from dagster.core.types.dagster_type import DagsterType

    step_key = check.opt_str_param(step_key, "step_key")
    name = check.opt_str_param(name, "name")
    metadata = check.opt_dict_param(metadata, "metadata", key_type=str)
    run_id = check.opt_str_param(run_id, "run_id", default=RUN_ID_PLACEHOLDER)
    mapping_key = check.opt_str_param(mapping_key, "mapping_key")
    dagster_type = check.opt_inst_param(dagster_type, "dagster_type", DagsterType)
    version = check.opt_str_param(version, "version")
    resource_config = check.opt_dict_param(resource_config, "resource_config", key_type=str)
    resources = check.opt_dict_param(resources, "resources", key_type=str)
    solid_def = check.opt_inst_param(solid_def, "solid_def", SolidDefinition)
    op_def = check.opt_inst_param(op_def, "op_def", OpDefinition)

    return OutputContext(
        step_key=step_key,
        name=name,
        pipeline_name=None,
        run_id=run_id,
        metadata=metadata,
        mapping_key=mapping_key,
        config=config,
        solid_def=solid_def,
        dagster_type=dagster_type,
        log_manager=initialize_console_manager(None),
        version=version,
        resource_config=resource_config,
        resources=resources,
        step_context=None,
        op_def=op_def,
    )