From 6f5c5dfa9c2c6454c781bc70b9b0149e624fb0eb Mon Sep 17 00:00:00 2001 From: luen Date: Wed, 25 Feb 2026 13:58:56 +1000 Subject: [PATCH 1/2] Remove 'secure' attribute from cookies in ProxyGenerator to ensure compatibility with httpx Cookies.set() --- scholarly/_proxy_generator.py | 1 + 1 file changed, 1 insertion(+) diff --git a/scholarly/_proxy_generator.py b/scholarly/_proxy_generator.py index 49d5bd5..0297d89 100644 --- a/scholarly/_proxy_generator.py +++ b/scholarly/_proxy_generator.py @@ -446,6 +446,7 @@ def _handle_captcha2(self, url): cookie.pop("httpOnly", None) cookie.pop("expiry", None) cookie.pop("sameSite", None) + cookie.pop("secure", None) # httpx Cookies.set() does not accept 'secure' self._session.cookies.set(**cookie) return self._session From fb5834074844a92b7bc8cf7e4b75a89f4a8332e0 Mon Sep 17 00:00:00 2001 From: luen Date: Wed, 25 Feb 2026 15:39:12 +1000 Subject: [PATCH 2/2] Add exception handling for proxy failures in tests and update regex for citedby URL extraction --- scholarly/_proxy_generator.py | 5 ++++- scholarly/_scholarly.py | 2 +- test_module.py | 29 ++++++++++++++++++++++------- 3 files changed, 27 insertions(+), 9 deletions(-) diff --git a/scholarly/_proxy_generator.py b/scholarly/_proxy_generator.py index 0297d89..ad84d99 100644 --- a/scholarly/_proxy_generator.py +++ b/scholarly/_proxy_generator.py @@ -477,8 +477,11 @@ def _new_session(self, **kwargs): init_kwargs.update(headers=_HEADERS) if self._proxy_works: - init_kwargs["proxies"] = proxies #.get("http", None) self._proxies = proxies + # httpx uses proxy= (single URL), not proxies= (dict) + proxy_url = proxies.get("https://") or proxies.get("http://") + if proxy_url: + init_kwargs["proxy"] = proxy_url if self.proxy_mode is ProxyMode.SCRAPERAPI: # SSL Certificate verification must be disabled for # ScraperAPI requests to work. diff --git a/scholarly/_scholarly.py b/scholarly/_scholarly.py index f0162dc..cd4edc3 100644 --- a/scholarly/_scholarly.py +++ b/scholarly/_scholarly.py @@ -309,7 +309,7 @@ def citedby(self, object: Publication)->_SearchScholarIterator: def _citedby_long(self, object: Publication, years): # Extract cites_id. Note: There could be multiple ones, separated by commas. - m = re.search("cites=[\d+,]*", object["citedby_url"]) + m = re.search(r"cites=[\d+,]*", object["citedby_url"]) pub_id = m.group()[6:] for y_hi, y_lo in years: sub_citations = self.search_citedby(publication_id=pub_id, year_low=y_lo, year_high=y_hi) diff --git a/test_module.py b/test_module.py index 0effc0f..eef7149 100644 --- a/test_module.py +++ b/test_module.py @@ -2,7 +2,7 @@ import os import sys from collections import Counter -from scholarly import scholarly, ProxyGenerator +from scholarly import scholarly, ProxyGenerator, MaxTriesExceededException from scholarly.data_types import Mandate from scholarly.publication_parser import PublicationParser import random @@ -15,6 +15,12 @@ import pandas as pd except ImportError: pd = None +try: + from fp.errors import FreeProxyException +except ImportError: + class FreeProxyException(Exception): + """Placeholder when free-proxy package is not installed.""" + pass class TestLuminati(unittest.TestCase): @@ -86,9 +92,12 @@ def setUpClass(cls): scholarly.set_timeout(5) scholarly.set_retries(5) - pg = ProxyGenerator() - pg.FreeProxies() - scholarly.use_proxy(pg, ProxyGenerator()) + try: + pg = ProxyGenerator() + pg.FreeProxies() + scholarly.use_proxy(pg, ProxyGenerator()) + except (FreeProxyException, MaxTriesExceededException) as e: + raise unittest.SkipTest(f"No working free proxy available: {e}") from e # Try storing the file temporarily as `scholarly.csv` and delete it. # If there exists already a file with that name, generate a random name @@ -596,12 +605,18 @@ def setUpClass(cls): cls.connection_method = os.getenv("CONNECTION_METHOD") else: cls.connection_method = "none" - scholarly.use_proxy(None) + try: + scholarly.use_proxy(None) + except (FreeProxyException, MaxTriesExceededException) as e: + raise unittest.SkipTest(f"No working free proxy available: {e}") from e return # Use dual proxies for unit testing - secondary_proxy_generator = ProxyGenerator() - secondary_proxy_generator.FreeProxies() + try: + secondary_proxy_generator = ProxyGenerator() + secondary_proxy_generator.FreeProxies() + except (FreeProxyException, MaxTriesExceededException) as e: + raise unittest.SkipTest(f"No working free proxy available: {e}") from e proxy_generator = ProxyGenerator() if cls.connection_method == "tor":