Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions src/crawlee/otel/crawler_instrumentor.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,10 +69,10 @@ def _init_wrapper(wrapped: Any, _: Any, args: Any, kwargs: Any) -> None:

async def middleware_wrapper(wrapped: Any, instance: _Middleware, args: Any, kwargs: Any) -> Any:
with self._tracer.start_as_current_span(
name=f'{instance.generator.__name__}, {wrapped.__name__}', # type:ignore[attr-defined] # valid in our context
name=f'{instance.generator.__name__}, {wrapped.__name__}', # ty:ignore[unresolved-attribute] # valid in our context
attributes={
URL_FULL: instance.input_context.request.url,
CODE_FUNCTION_NAME: instance.generator.__qualname__, # type:ignore[attr-defined] # valid in our context
CODE_FUNCTION_NAME: instance.generator.__qualname__, # ty:ignore[unresolved-attribute] # valid in our context
},
):
return await wrapped(*args, **kwargs)
Expand Down
3 changes: 1 addition & 2 deletions src/crawlee/storage_clients/_base/_dataset_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,6 @@ async def iterate_items(
The backend method for the `Dataset.iterate_items` call.
"""
# This syntax is to make type checker properly work with abstract AsyncIterator.
# https://mypy.readthedocs.io/en/stable/more_types.html#asynchronous-iterators
raise NotImplementedError
if False:
yield 0
yield {}
7 changes: 4 additions & 3 deletions src/crawlee/storage_clients/_base/_key_value_store_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,12 @@
from abc import ABC, abstractmethod
from typing import TYPE_CHECKING, Any

from crawlee.storage_clients.models import KeyValueStoreRecordMetadata

if TYPE_CHECKING:
from collections.abc import AsyncIterator

from crawlee.storage_clients.models import KeyValueStoreMetadata, KeyValueStoreRecord, KeyValueStoreRecordMetadata
from crawlee.storage_clients.models import KeyValueStoreMetadata, KeyValueStoreRecord


class KeyValueStoreClient(ABC):
Expand Down Expand Up @@ -73,10 +75,9 @@ async def iterate_keys(
The backend method for the `KeyValueStore.iterate_keys` call.
"""
# This syntax is to make type checker properly work with abstract AsyncIterator.
# https://mypy.readthedocs.io/en/stable/more_types.html#asynchronous-iterators
raise NotImplementedError
if False:
yield 0
yield KeyValueStoreRecordMetadata()

@abstractmethod
async def get_public_url(self, *, key: str) -> str:
Expand Down
2 changes: 1 addition & 1 deletion src/crawlee/storages/_storage_instance_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,7 @@ async def open_storage_instance(

metadata = await client.get_metadata()

instance = cls(client, metadata.id, metadata.name) # type: ignore[call-arg]
instance = cls(client, metadata.id, metadata.name) # ty: ignore[too-many-positional-arguments]
instance_name = getattr(instance, 'name', None)

# Cache the instance.
Expand Down
4 changes: 2 additions & 2 deletions tests/unit/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,8 +74,8 @@ def _prepare_test_env() -> None:

# Reset global class variables to ensure test isolation.
KeyValueStore._autosaved_values = {}
Statistics._Statistics__next_id = 0 # type:ignore[attr-defined] # Mangled attribute
BasicCrawler._BasicCrawler__next_id = 0 # type:ignore[attr-defined] # Mangled attribute
Statistics._Statistics__next_id = 0 # ty:ignore[unresolved-attribute] # Mangled attribute
BasicCrawler._BasicCrawler__next_id = 0 # ty:ignore[unresolved-attribute] # Mangled attribute

return _prepare_test_env

Expand Down
6 changes: 3 additions & 3 deletions tests/unit/crawlers/_basic/test_basic_crawler.py
Original file line number Diff line number Diff line change
Expand Up @@ -1168,7 +1168,7 @@ async def test_crawler_multiple_stops_in_parallel() -> None:
# Set concurrency to 2 to ensure two urls are being visited in parallel.
crawler = BasicCrawler(concurrency_settings=ConcurrencySettings(desired_concurrency=2, max_concurrency=2))

both_handlers_started = asyncio.Barrier(2) # type:ignore[attr-defined] # Test is skipped in older Python versions.
both_handlers_started = asyncio.Barrier(2) # ty:ignore[unresolved-attribute] # Test is skipped in older Python versions.
only_one_handler_at_a_time = asyncio.Semaphore(1)

@crawler.router.default_handler
Expand Down Expand Up @@ -1352,7 +1352,7 @@ async def test_context_use_state_race_condition_in_handlers(key_value_store: Key
Result should be incremented by 2.
Method `use_state` must be implemented in a way that prevents race conditions in such scenario."""
# Test is skipped in older Python versions.
from asyncio import Barrier # type:ignore[attr-defined] # noqa: PLC0415
from asyncio import Barrier # ty:ignore[unresolved-import] # noqa: PLC0415

crawler = BasicCrawler()
store = await crawler.get_key_value_store()
Expand Down Expand Up @@ -1393,7 +1393,7 @@ async def test_timeout_in_handler(sleep_type: str) -> None:
Crawler should attempt to retry it.
This test creates situation where the request handler times out twice, on third retry it does not time out."""
# Test is skipped in older Python versions.
from asyncio import timeout # type:ignore[attr-defined] # noqa: PLC0415
from asyncio import timeout # ty:ignore[unresolved-import] # noqa: PLC0415

non_realtime_system_coefficient = 10
handler_timeout = timedelta(seconds=1)
Expand Down
4 changes: 2 additions & 2 deletions tests/unit/storages/test_storage_instance_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ async def test_preexisting_unnamed_storage_open_by_id(storage_type: type[Storage
@pytest.mark.skipif(sys.version_info[:3] < (3, 11), reason='asyncio.Barrier was introduced in Python 3.11.')
async def test_concurrent_open_datasets() -> None:
"""Test that concurrent open datasets with the same name return the same instance."""
from asyncio import Barrier # type:ignore[attr-defined] # noqa: PLC0415
from asyncio import Barrier # ty:ignore[unresolved-import] # noqa: PLC0415

barrier = Barrier(2)

Expand All @@ -161,7 +161,7 @@ async def push_data(data: dict) -> None:
@pytest.mark.skipif(sys.version_info[:3] < (3, 11), reason='asyncio.Barrier was introduced in Python 3.11.')
async def test_concurrent_open_datasets_with_same_name_and_alias() -> None:
"""Test that concurrent open requests for the same storage return the same instance."""
from asyncio import Barrier # type:ignore[attr-defined] # noqa: PLC0415
from asyncio import Barrier # ty:ignore[unresolved-import] # noqa: PLC0415

valid_kwargs: dict[str, str | None] = {}

Expand Down
40 changes: 20 additions & 20 deletions uv.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading