Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions docs/source/user-guide/dataframe/rendering.rst
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,8 @@ You can customize how DataFrames are rendered by configuring the formatter:
max_width=1000, # Maximum width in pixels
max_height=300, # Maximum height in pixels
max_memory_bytes=2097152, # Maximum memory for rendering (2MB)
min_rows_display=20, # Minimum number of rows to display
repr_rows=10, # Number of rows to display in __repr__
min_rows=10, # Minimum number of rows to display
max_rows=10, # Maximum rows to display in __repr__
enable_cell_expansion=True,# Allow expanding truncated cells
custom_css=None, # Additional custom CSS
show_truncation_message=True, # Show message when data is truncated
Expand Down Expand Up @@ -190,8 +190,8 @@ You can control how much data is displayed and how much memory is used for rende

configure_formatter(
max_memory_bytes=4 * 1024 * 1024, # 4MB maximum memory for display
min_rows_display=50, # Always show at least 50 rows
repr_rows=20 # Show 20 rows in __repr__ output
min_rows=20, # Always show at least 20 rows
max_rows=50 # Show up to 50 rows in output
)

These parameters help balance comprehensive data display against performance considerations.
Expand Down
235 changes: 187 additions & 48 deletions python/datafusion/dataframe_formatter.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

from __future__ import annotations

import warnings
from typing import (
TYPE_CHECKING,
Any,
Expand Down Expand Up @@ -61,6 +62,93 @@ def _validate_bool(value: Any, param_name: str) -> None:
raise TypeError(msg)


def _validate_formatter_parameters(
max_cell_length: int,
max_width: int,
max_height: int,
max_memory_bytes: int,
min_rows: int,
max_rows: int | None,
repr_rows: int | None,
enable_cell_expansion: bool,
show_truncation_message: bool,
use_shared_styles: bool,
custom_css: str | None,
style_provider: Any,
) -> int:
"""Validate all formatter parameters and return resolved max_rows value.

Args:
max_cell_length: Maximum cell length value to validate
max_width: Maximum width value to validate
max_height: Maximum height value to validate
max_memory_bytes: Maximum memory bytes value to validate
min_rows: Minimum rows to display value to validate
max_rows: Maximum rows value to validate (None means use default)
repr_rows: Deprecated repr_rows value to validate
enable_cell_expansion: Boolean expansion flag to validate
show_truncation_message: Boolean message flag to validate
use_shared_styles: Boolean styles flag to validate
custom_css: Custom CSS string to validate
style_provider: Style provider object to validate

Returns:
The resolved max_rows value after handling repr_rows deprecation

Raises:
ValueError: If any numeric parameter is invalid or constraints are violated
TypeError: If any parameter has invalid type
DeprecationWarning: If repr_rows parameter is used
"""
# Validate numeric parameters
_validate_positive_int(max_cell_length, "max_cell_length")
_validate_positive_int(max_width, "max_width")
_validate_positive_int(max_height, "max_height")
_validate_positive_int(max_memory_bytes, "max_memory_bytes")
_validate_positive_int(min_rows, "min_rows")

# Handle deprecated repr_rows parameter
if repr_rows is not None:
warnings.warn(
"repr_rows parameter is deprecated, use max_rows instead",
DeprecationWarning,
stacklevel=4,
)
_validate_positive_int(repr_rows, "repr_rows")
if max_rows is not None and repr_rows != max_rows:
msg = "Cannot specify both repr_rows and max_rows; use max_rows only"
raise ValueError(msg)
max_rows = repr_rows

# Use default if max_rows was not provided
if max_rows is None:
max_rows = 10

_validate_positive_int(max_rows, "max_rows")

# Validate constraint: min_rows <= max_rows
if min_rows > max_rows:
msg = "min_rows must be less than or equal to max_rows"
raise ValueError(msg)

# Validate boolean parameters
_validate_bool(enable_cell_expansion, "enable_cell_expansion")
_validate_bool(show_truncation_message, "show_truncation_message")
_validate_bool(use_shared_styles, "use_shared_styles")

# Validate custom_css
if custom_css is not None and not isinstance(custom_css, str):
msg = "custom_css must be None or a string"
raise TypeError(msg)

# Validate style_provider
if style_provider is not None and not isinstance(style_provider, StyleProvider):
msg = "style_provider must implement the StyleProvider protocol"
raise TypeError(msg)

return max_rows


@runtime_checkable
class CellFormatter(Protocol):
"""Protocol for cell value formatters."""
Expand Down Expand Up @@ -126,8 +214,9 @@ class DataFrameHtmlFormatter:
max_width: Maximum width of the HTML table in pixels
max_height: Maximum height of the HTML table in pixels
max_memory_bytes: Maximum memory in bytes for rendered data (default: 2MB)
min_rows_display: Minimum number of rows to display
repr_rows: Default number of rows to display in repr output
min_rows: Minimum number of rows to display (must be <= max_rows)
max_rows: Maximum number of rows to display in repr output
repr_rows: Deprecated alias for max_rows
enable_cell_expansion: Whether to add expand/collapse buttons for long cell
values
custom_css: Additional CSS to include in the HTML output
Expand All @@ -143,8 +232,9 @@ def __init__(
max_width: int = 1000,
max_height: int = 300,
max_memory_bytes: int = 2 * 1024 * 1024, # 2 MB
min_rows_display: int = 20,
repr_rows: int = 10,
min_rows: int = 10,
max_rows: int | None = None,
repr_rows: int | None = None,
enable_cell_expansion: bool = True,
custom_css: str | None = None,
show_truncation_message: bool = True,
Expand All @@ -155,71 +245,70 @@ def __init__(

Parameters
----------
max_cell_length : int, default 25
max_cell_length
Maximum length of cell content before truncation.
max_width : int, default 1000
max_width
Maximum width of the displayed table in pixels.
max_height : int, default 300
max_height
Maximum height of the displayed table in pixels.
max_memory_bytes : int, default 2097152 (2MB)
Maximum memory in bytes for rendered data.
min_rows_display : int, default 20
Minimum number of rows to display.
repr_rows : int, default 10
Default number of rows to display in repr output.
enable_cell_expansion : bool, default True
max_memory_bytes
Maximum memory in bytes for rendered data. Helps prevent performance
issues with large datasets.
min_rows
Minimum number of rows to display even if memory limit is reached.
Must not exceed ``max_rows``.
max_rows
Maximum number of rows to display. Takes precedence over memory limits
when fewer rows are requested.
repr_rows
Deprecated alias for ``max_rows``. Use ``max_rows`` instead.
enable_cell_expansion
Whether to allow cells to expand when clicked.
custom_css : str, optional
custom_css
Custom CSS to apply to the HTML table.
show_truncation_message : bool, default True
show_truncation_message
Whether to show a message indicating that content has been truncated.
style_provider : StyleProvider, optional
style_provider
Provider of CSS styles for the HTML table. If None, DefaultStyleProvider
is used.
use_shared_styles : bool, default True
Whether to use shared styles across multiple tables.
use_shared_styles
Whether to use shared styles across multiple tables. This improves
performance when displaying many DataFrames in a single notebook.

Raises:
------
ValueError
If max_cell_length, max_width, max_height, max_memory_bytes,
min_rows_display, or repr_rows is not a positive integer.
min_rows or max_rows is not a positive integer, or if min_rows
exceeds max_rows.
TypeError
If enable_cell_expansion, show_truncation_message, or use_shared_styles is
not a boolean,
or if custom_css is provided but is not a string,
or if style_provider is provided but does not implement the StyleProvider
not a boolean, or if custom_css is provided but is not a string, or if
style_provider is provided but does not implement the StyleProvider
protocol.
"""
# Validate numeric parameters
_validate_positive_int(max_cell_length, "max_cell_length")
_validate_positive_int(max_width, "max_width")
_validate_positive_int(max_height, "max_height")
_validate_positive_int(max_memory_bytes, "max_memory_bytes")
_validate_positive_int(min_rows_display, "min_rows_display")
_validate_positive_int(repr_rows, "repr_rows")

# Validate boolean parameters
_validate_bool(enable_cell_expansion, "enable_cell_expansion")
_validate_bool(show_truncation_message, "show_truncation_message")
_validate_bool(use_shared_styles, "use_shared_styles")

# Validate custom_css
if custom_css is not None and not isinstance(custom_css, str):
msg = "custom_css must be None or a string"
raise TypeError(msg)

# Validate style_provider
if style_provider is not None and not isinstance(style_provider, StyleProvider):
msg = "style_provider must implement the StyleProvider protocol"
raise TypeError(msg)
# Validate all parameters and get resolved max_rows
resolved_max_rows = _validate_formatter_parameters(
max_cell_length,
max_width,
max_height,
max_memory_bytes,
min_rows,
max_rows,
repr_rows,
enable_cell_expansion,
show_truncation_message,
use_shared_styles,
custom_css,
style_provider,
)

self.max_cell_length = max_cell_length
self.max_width = max_width
self.max_height = max_height
self.max_memory_bytes = max_memory_bytes
self.min_rows_display = min_rows_display
self.repr_rows = repr_rows
self.min_rows = min_rows
self._max_rows = resolved_max_rows
self.enable_cell_expansion = enable_cell_expansion
self.custom_css = custom_css
self.show_truncation_message = show_truncation_message
Expand All @@ -231,6 +320,55 @@ def __init__(
self._custom_cell_builder: Callable[[Any, int, int, str], str] | None = None
self._custom_header_builder: Callable[[Any], str] | None = None

@property
def max_rows(self) -> int:
"""Get the maximum number of rows to display.

Returns:
The maximum number of rows to display in repr output
"""
return self._max_rows

@max_rows.setter
def max_rows(self, value: int) -> None:
"""Set the maximum number of rows to display.

Args:
value: The maximum number of rows
"""
self._max_rows = value

@property
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If repr_rows is being deprecated, why add an accessor?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I added the accessors for backward compatibility during the deprecation period:

Rationale:

  1. User code may directly access the property: Code like formatter.repr_rows = 20 continue working during the deprecation period
  2. Graceful migration path: Users get a warning but their code doesn't break
  3. Custom formatter implementations: External code that inherits from the formatter and accesses repr_rows directly will continue to work

Shall we keep the accessors for now with the deprecation warnings, plan removal in next major version?

def repr_rows(self) -> int:
"""Get the maximum number of rows (deprecated name).

.. deprecated::
Use :attr:`max_rows` instead. This property is provided for
backward compatibility.

Returns:
The maximum number of rows to display
"""
return self._max_rows

@repr_rows.setter
def repr_rows(self, value: int) -> None:
"""Set the maximum number of rows using deprecated name.

Comment on lines +354 to +357
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same, why add for deprecated?

.. deprecated::
Use :attr:`max_rows` setter instead. This property is provided for
backward compatibility.

Args:
value: The maximum number of rows
"""
warnings.warn(
"repr_rows is deprecated, use max_rows instead",
DeprecationWarning,
stacklevel=2,
)
self._max_rows = value

def register_formatter(self, type_class: type, formatter: CellFormatter) -> None:
"""Register a custom formatter for a specific data type.

Expand Down Expand Up @@ -659,7 +797,8 @@ def configure_formatter(**kwargs: Any) -> None:
"max_width",
"max_height",
"max_memory_bytes",
"min_rows_display",
"min_rows",
"max_rows",
"repr_rows",
"enable_cell_expansion",
"custom_css",
Expand Down
Loading