import warnings
from typing import TYPE_CHECKING, Any, Dict, Iterator, List, Optional, Union, cast
from dagster import check
from dagster.core.definitions.events import (
AssetKey,
AssetMaterialization,
AssetObservation,
Materialization,
MetadataEntry,
PartitionMetadataEntry,
)
from dagster.core.definitions.op_definition import OpDefinition
from dagster.core.definitions.partition_key_range import PartitionKeyRange
from dagster.core.definitions.solid_definition import SolidDefinition
from dagster.core.definitions.time_window_partitions import TimeWindow
from dagster.core.errors import DagsterInvariantViolationError
from dagster.core.execution.plan.utils import build_resources_for_manager
if TYPE_CHECKING:
from dagster.core.definitions import PipelineDefinition
from dagster.core.definitions.resource_definition import Resources
from dagster.core.events import DagsterEvent
from dagster.core.execution.context.system import StepExecutionContext
from dagster.core.execution.plan.outputs import StepOutputHandle
from dagster.core.execution.plan.plan import ExecutionPlan
from dagster.core.log_manager import DagsterLogManager
from dagster.core.system_config.objects import ResolvedRunConfig
from dagster.core.types.dagster_type import DagsterType
RUN_ID_PLACEHOLDER = "__EPHEMERAL_RUN_ID"
[docs]class OutputContext:
"""
The context object that is available to the `handle_output` method of an :py:class:`IOManager`.
Attributes:
step_key (Optional[str]): The step_key for the compute step that produced the output.
name (Optional[str]): The name of the output that produced the output.
pipeline_name (Optional[str]): The name of the pipeline definition.
run_id (Optional[str]): The id of the run that produced the output.
metadata (Optional[Dict[str, Any]]): A dict of the metadata that is assigned to the
OutputDefinition that produced the output.
mapping_key (Optional[str]): The key that identifies a unique mapped output. None for regular outputs.
config (Optional[Any]): The configuration for the output.
solid_def (Optional[SolidDefinition]): The definition of the solid that produced the output.
dagster_type (Optional[DagsterType]): The type of this output.
log (Optional[DagsterLogManager]): The log manager to use for this output.
version (Optional[str]): (Experimental) The version of the output.
resource_config (Optional[Dict[str, Any]]): The config associated with the resource that
initializes the RootInputManager.
resources (Optional[Resources]): The resources required by the output manager, specified by the
`required_resource_keys` parameter.
op_def (Optional[OpDefinition]): The definition of the op that produced the output.
"""
def __init__(
self,
step_key: Optional[str] = None,
name: Optional[str] = None,
pipeline_name: Optional[str] = None,
run_id: Optional[str] = None,
metadata: Optional[Dict[str, Any]] = None,
mapping_key: Optional[str] = None,
config: Optional[Any] = None,
solid_def: Optional["SolidDefinition"] = None,
dagster_type: Optional["DagsterType"] = None,
log_manager: Optional["DagsterLogManager"] = None,
version: Optional[str] = None,
resource_config: Optional[Dict[str, Any]] = None,
resources: Optional[Union["Resources", Dict[str, Any]]] = None,
step_context: Optional["StepExecutionContext"] = None,
op_def: Optional["OpDefinition"] = None,
):
from dagster.core.definitions.resource_definition import IContainsGenerator, Resources
from dagster.core.execution.build_resources import build_resources
self._step_key = step_key
self._name = name
self._pipeline_name = pipeline_name
self._run_id = run_id
self._metadata = metadata
self._mapping_key = mapping_key
self._config = config
check.invariant(
solid_def is None or op_def is None, "Can't provide both a solid_def and an op_def arg"
)
self._solid_def = solid_def or op_def
self._dagster_type = dagster_type
self._log = log_manager
self._version = version
self._resource_config = resource_config
self._step_context = step_context
if isinstance(resources, Resources):
self._resources_cm = None
self._resources = resources
else:
self._resources_cm = build_resources(
check.opt_dict_param(resources, "resources", key_type=str)
)
self._resources = self._resources_cm.__enter__() # pylint: disable=no-member
self._resources_contain_cm = isinstance(self._resources, IContainsGenerator)
self._cm_scope_entered = False
self._events: List["DagsterEvent"] = []
self._user_events: List[Union[AssetMaterialization, AssetObservation, Materialization]] = []
self._metadata_entries: Optional[List[Union[MetadataEntry, PartitionMetadataEntry]]] = None
def __enter__(self):
if self._resources_cm:
self._cm_scope_entered = True
return self
def __exit__(self, *exc):
if self._resources_cm:
self._resources_cm.__exit__(*exc) # pylint: disable=no-member
def __del__(self):
if self._resources_cm and self._resources_contain_cm and not self._cm_scope_entered:
self._resources_cm.__exit__(None, None, None) # pylint: disable=no-member
@property
def step_key(self) -> str:
if self._step_key is None:
raise DagsterInvariantViolationError(
"Attempting to access step_key, "
"but it was not provided when constructing the OutputContext"
)
return self._step_key
@property
def name(self) -> str:
if self._name is None:
raise DagsterInvariantViolationError(
"Attempting to access name, "
"but it was not provided when constructing the OutputContext"
)
return self._name
@property
def pipeline_name(self) -> str:
if self._pipeline_name is None:
raise DagsterInvariantViolationError(
"Attempting to access pipeline_name, "
"but it was not provided when constructing the OutputContext"
)
return self._pipeline_name
@property
def run_id(self) -> str:
if self._run_id is None:
raise DagsterInvariantViolationError(
"Attempting to access run_id, "
"but it was not provided when constructing the OutputContext"
)
return self._run_id
@property
def metadata(self) -> Optional[Dict[str, Any]]:
return self._metadata
@property
def mapping_key(self) -> Optional[str]:
return self._mapping_key
@property
def config(self) -> Any:
return self._config
@property
def solid_def(self) -> "SolidDefinition":
if self._solid_def is None:
raise DagsterInvariantViolationError(
"Attempting to access solid_def, "
"but it was not provided when constructing the OutputContext"
)
return self._solid_def
@property
def op_def(self) -> "OpDefinition":
if self._solid_def is None:
raise DagsterInvariantViolationError(
"Attempting to access op_def, "
"but it was not provided when constructing the OutputContext"
)
return cast(OpDefinition, self._solid_def)
@property
def dagster_type(self) -> "DagsterType":
if self._dagster_type is None:
raise DagsterInvariantViolationError(
"Attempting to access dagster_type, "
"but it was not provided when constructing the OutputContext"
)
return self._dagster_type
@property
def log(self) -> "DagsterLogManager":
if self._log is None:
raise DagsterInvariantViolationError(
"Attempting to access log, "
"but it was not provided when constructing the OutputContext"
)
return self._log
@property
def version(self) -> Optional[str]:
return self._version
@property
def resource_config(self) -> Optional[Dict[str, Any]]:
return self._resource_config
@property
def resources(self) -> Any:
if self._resources is None:
raise DagsterInvariantViolationError(
"Attempting to access resources, "
"but it was not provided when constructing the OutputContext"
)
if self._resources_cm and self._resources_contain_cm and not self._cm_scope_entered:
raise DagsterInvariantViolationError(
"At least one provided resource is a generator, but attempting to access "
"resources outside of context manager scope. You can use the following syntax to "
"open a context manager: `with build_output_context(...) as context:`"
)
return self._resources
@property
def asset_key(self) -> Optional[AssetKey]:
matching_output_defs = [
output_def
for output_def in cast(SolidDefinition, self._solid_def).output_defs
if output_def.name == self.name
]
check.invariant(len(matching_output_defs) == 1)
return matching_output_defs[0].get_asset_key(self)
@property
def step_context(self) -> "StepExecutionContext":
if self._step_context is None:
raise DagsterInvariantViolationError(
"Attempting to access step_context, "
"but it was not provided when constructing the OutputContext"
)
return self._step_context
@property
def has_partition_key(self) -> bool:
"""Whether the current run is a partitioned run"""
return self.step_context.has_partition_key
@property
def partition_key(self) -> str:
"""The partition key for the current run.
Raises an error if the current run is not a partitioned run.
"""
return self.step_context.partition_key
@property
def has_asset_partitions(self) -> bool:
if self._step_context is not None:
return self._step_context.has_asset_partitions_for_output(self.name)
else:
return False
@property
def asset_partition_key(self) -> str:
"""The partition key for output asset.
Raises an error if the output asset has no partitioning, or if the run covers a partition
range for the output asset.
"""
return self.step_context.asset_partition_key_for_output(self.name)
@property
def asset_partition_key_range(self) -> PartitionKeyRange:
"""The partition key range for output asset.
Raises an error if the output asset has no partitioning.
"""
return self.step_context.asset_partition_key_range_for_output(self.name)
@property
def asset_partitions_time_window(self) -> TimeWindow:
"""The time window for the partitions of the output asset.
Raises an error if either of the following are true:
- The output asset has no partitioning.
- The output asset is not partitioned with a TimeWindowPartitionsDefinition.
"""
return self.step_context.asset_partitions_time_window_for_output(self.name)
[docs] def get_run_scoped_output_identifier(self) -> List[str]:
"""Utility method to get a collection of identifiers that as a whole represent a unique
step output.
The unique identifier collection consists of
- ``run_id``: the id of the run which generates the output.
Note: This method also handles the re-execution memoization logic. If the step that
generates the output is skipped in the re-execution, the ``run_id`` will be the id
of its parent run.
- ``step_key``: the key for a compute step.
- ``name``: the name of the output. (default: 'result').
Returns:
List[str, ...]: A list of identifiers, i.e. run id, step key, and output name
"""
warnings.warn(
"`OutputContext.get_run_scoped_output_identifier` is deprecated. Use "
"`OutputContext.get_output_identifier` instead."
)
# if run_id is None and this is a re-execution, it means we failed to find its source run id
check.invariant(
self.run_id is not None,
"Unable to find the run scoped output identifier: run_id is None on OutputContext.",
)
check.invariant(
self.step_key is not None,
"Unable to find the run scoped output identifier: step_key is None on OutputContext.",
)
check.invariant(
self.name is not None,
"Unable to find the run scoped output identifier: name is None on OutputContext.",
)
run_id = cast(str, self.run_id)
step_key = cast(str, self.step_key)
name = cast(str, self.name)
if self.mapping_key:
return [run_id, step_key, name, self.mapping_key]
return [run_id, step_key, name]
[docs] def get_output_identifier(self) -> List[str]:
"""Utility method to get a collection of identifiers that as a whole represent a unique
step output.
If not using memoization, the unique identifier collection consists of
- ``run_id``: the id of the run which generates the output.
Note: This method also handles the re-execution memoization logic. If the step that
generates the output is skipped in the re-execution, the ``run_id`` will be the id
of its parent run.
- ``step_key``: the key for a compute step.
- ``name``: the name of the output. (default: 'result').
If using memoization, the ``version`` corresponding to the step output is used in place of
the ``run_id``.
Returns:
List[str, ...]: A list of identifiers, i.e. (run_id or version), step_key, and output_name
"""
version = self.version
step_key = self.step_key
name = self.name
if version is not None:
check.invariant(
self.mapping_key is None,
f"Mapping key and version both provided for output '{name}' of step '{step_key}'. "
"Dynamic mapping is not supported when using versioning.",
)
identifier = ["versioned_outputs", version, step_key, name]
else:
run_id = self.run_id
identifier = [run_id, step_key, name]
if self.mapping_key:
identifier.append(self.mapping_key)
return identifier
[docs] def log_event(
self, event: Union[AssetObservation, AssetMaterialization, Materialization]
) -> None:
"""Log an AssetMaterialization or AssetObservation from within the body of an io manager's `handle_output` method.
Events logged with this method will appear in the event log.
Args:
event (Union[AssetMaterialization, Materialization, AssetObservation]): The event to log.
Examples:
.. code-block:: python
from dagster import IOManager, AssetMaterialization
class MyIOManager(IOManager):
def handle_output(self, context, obj):
context.log_event(AssetMaterialization("foo"))
"""
from dagster.core.events import DagsterEvent
if isinstance(event, (AssetMaterialization, Materialization)):
if self._step_context:
self._events.append(
DagsterEvent.asset_materialization(
self._step_context,
event,
self._step_context.get_input_lineage(),
)
)
self._user_events.append(event)
elif isinstance(event, AssetObservation):
if self._step_context:
self._events.append(DagsterEvent.asset_observation(self._step_context, event))
self._user_events.append(event)
else:
check.failed("Unexpected event {event}".format(event=event))
[docs] def consume_events(self) -> Iterator["DagsterEvent"]:
"""Pops and yields all user-generated events that have been recorded from this context.
If consume_events has not yet been called, this will yield all logged events since the call to `handle_output`. If consume_events has been called, it will yield all events since the last time consume_events was called. Designed for internal use. Users should never need to invoke this method.
"""
events = self._events
self._events = []
yield from events
[docs] def get_logged_events(
self,
) -> List[Union[AssetMaterialization, Materialization, AssetObservation]]:
"""Retrieve the list of user-generated events that were logged via the context.
User-generated events that were yielded will not appear in this list.
**Examples:**
.. code-block:: python
from dagster import IOManager, build_output_context, AssetMaterialization
class MyIOManager(IOManager):
def handle_output(self, context, obj):
...
def test_handle_output():
mgr = MyIOManager()
context = build_output_context()
mgr.handle_output(context)
all_user_events = context.get_logged_events()
materializations = [event for event in all_user_events if isinstance(event, AssetMaterialization)]
...
"""
return self._user_events
[docs] def add_output_metadata(self, metadata: Dict[str, Any]) -> None:
"""Add a dictionary of metadata to the handled output.
Metadata entries added will show up in the HANDLED_OUTPUT and ASSET_MATERIALIZATION events for the run.
Args:
metadata (Dict[str, Any]): A metadata dictionary to log
Examples:
.. code-block:: python
from dagster import IOManager
class MyIOManager(IOManager):
def handle_output(self, context, obj):
context.add_output_metadata({"foo": "bar"})
"""
from dagster.core.definitions.metadata import normalize_metadata
self._metadata_entries = normalize_metadata(metadata, [])
[docs] def get_logged_metadata_entries(
self,
) -> List[Union[MetadataEntry, PartitionMetadataEntry]]:
"""Get the list of metadata entries that have been logged for use with this output."""
return self._metadata_entries or []
[docs] def consume_logged_metadata_entries(
self,
) -> List[Union[MetadataEntry, PartitionMetadataEntry]]:
"""Pops and yields all user-generated metadata entries that have been recorded from this context.
If consume_logged_metadata_entries has not yet been called, this will yield all logged events since the call to `handle_output`. If consume_logged_metadata_entries has been called, it will yield all events since the last time consume_logged_metadata_entries was called. Designed for internal use. Users should never need to invoke this method.
"""
result = self._metadata_entries
self._metadata_entries = []
return result or []
def get_output_context(
execution_plan: "ExecutionPlan",
pipeline_def: "PipelineDefinition",
resolved_run_config: "ResolvedRunConfig",
step_output_handle: "StepOutputHandle",
run_id: Optional[str],
log_manager: Optional["DagsterLogManager"],
step_context: Optional["StepExecutionContext"],
resources: Optional["Resources"],
version: Optional[str],
) -> "OutputContext":
"""
Args:
run_id (str): The run ID of the run that produced the output, not necessarily the run that
the context will be used in.
"""
step = execution_plan.get_step_by_key(step_output_handle.step_key)
# get config
solid_config = resolved_run_config.solids[step.solid_handle.to_string()]
outputs_config = solid_config.outputs
if outputs_config:
output_config = outputs_config.get_output_manager_config(step_output_handle.output_name)
else:
output_config = None
step_output = execution_plan.get_step_output(step_output_handle)
output_def = pipeline_def.get_solid(step_output.solid_handle).output_def_named(step_output.name)
io_manager_key = output_def.io_manager_key
resource_config = resolved_run_config.resources[io_manager_key].config
if step_context:
check.invariant(
not resources,
"Expected either resources or step context to be set, but "
"received both. If step context is provided, resources for IO manager will be "
"retrieved off of that.",
)
resources = build_resources_for_manager(io_manager_key, step_context)
return OutputContext(
step_key=step_output_handle.step_key,
name=step_output_handle.output_name,
pipeline_name=pipeline_def.name,
run_id=run_id,
metadata=output_def.metadata,
mapping_key=step_output_handle.mapping_key,
config=output_config,
solid_def=pipeline_def.get_solid(step.solid_handle).definition,
dagster_type=output_def.dagster_type,
log_manager=log_manager,
version=version,
step_context=step_context,
resource_config=resource_config,
resources=resources,
)
def step_output_version(
pipeline_def: "PipelineDefinition",
execution_plan: "ExecutionPlan",
resolved_run_config: "ResolvedRunConfig",
step_output_handle: "StepOutputHandle",
) -> Optional[str]:
from dagster.core.execution.resolve_versions import resolve_step_output_versions
step_output_versions = resolve_step_output_versions(
pipeline_def, execution_plan, resolved_run_config
)
return (
step_output_versions[step_output_handle]
if step_output_handle in step_output_versions
else None
)
[docs]def build_output_context(
step_key: Optional[str] = None,
name: Optional[str] = None,
metadata: Optional[Dict[str, Any]] = None,
run_id: Optional[str] = None,
mapping_key: Optional[str] = None,
config: Optional[Any] = None,
dagster_type: Optional["DagsterType"] = None,
version: Optional[str] = None,
resource_config: Optional[Dict[str, Any]] = None,
resources: Optional[Dict[str, Any]] = None,
solid_def: Optional[SolidDefinition] = None,
op_def: Optional[OpDefinition] = None,
) -> "OutputContext":
"""Builds output context from provided parameters.
``build_output_context`` can be used as either a function, or a context manager. If resources
that are also context managers are provided, then ``build_output_context`` must be used as a
context manager.
Args:
step_key (Optional[str]): The step_key for the compute step that produced the output.
name (Optional[str]): The name of the output that produced the output.
metadata (Optional[Dict[str, Any]]): A dict of the metadata that is assigned to the
OutputDefinition that produced the output.
mapping_key (Optional[str]): The key that identifies a unique mapped output. None for regular outputs.
config (Optional[Any]): The configuration for the output.
dagster_type (Optional[DagsterType]): The type of this output.
version (Optional[str]): (Experimental) The version of the output.
resource_config (Optional[Dict[str, Any]]): The resource config to make available from the
input context. This usually corresponds to the config provided to the resource that
loads the output manager.
resources (Optional[Resources]): The resources to make available from the context.
For a given key, you can provide either an actual instance of an object, or a resource
definition.
solid_def (Optional[SolidDefinition]): The definition of the solid that produced the output.
op_def (Optional[OpDefinition]): The definition of the solid that produced the output.
Examples:
.. code-block:: python
build_output_context()
with build_output_context(resources={"foo": context_manager_resource}) as context:
do_something
"""
from dagster.core.execution.context_creation_pipeline import initialize_console_manager
from dagster.core.types.dagster_type import DagsterType
step_key = check.opt_str_param(step_key, "step_key")
name = check.opt_str_param(name, "name")
metadata = check.opt_dict_param(metadata, "metadata", key_type=str)
run_id = check.opt_str_param(run_id, "run_id", default=RUN_ID_PLACEHOLDER)
mapping_key = check.opt_str_param(mapping_key, "mapping_key")
dagster_type = check.opt_inst_param(dagster_type, "dagster_type", DagsterType)
version = check.opt_str_param(version, "version")
resource_config = check.opt_dict_param(resource_config, "resource_config", key_type=str)
resources = check.opt_dict_param(resources, "resources", key_type=str)
solid_def = check.opt_inst_param(solid_def, "solid_def", SolidDefinition)
op_def = check.opt_inst_param(op_def, "op_def", OpDefinition)
return OutputContext(
step_key=step_key,
name=name,
pipeline_name=None,
run_id=run_id,
metadata=metadata,
mapping_key=mapping_key,
config=config,
solid_def=solid_def,
dagster_type=dagster_type,
log_manager=initialize_console_manager(None),
version=version,
resource_config=resource_config,
resources=resources,
step_context=None,
op_def=op_def,
)