scholarly-python-package · Luen · Feb 25, 2026 · Feb 25, 2026
diff --git a/scholarly/_proxy_generator.py b/scholarly/_proxy_generator.py
@@ -446,6 +446,7 @@ def _handle_captcha2(self, url):
             cookie.pop("httpOnly", None)
             cookie.pop("expiry", None)
             cookie.pop("sameSite", None)
+            cookie.pop("secure", None)  # httpx Cookies.set() does not accept 'secure'
             self._session.cookies.set(**cookie)
 
         return self._session
@@ -476,8 +477,11 @@ def _new_session(self, **kwargs):
         init_kwargs.update(headers=_HEADERS)
 
         if self._proxy_works:
-            init_kwargs["proxies"] = proxies #.get("http", None)
             self._proxies = proxies
+            # httpx uses proxy= (single URL), not proxies= (dict)
+            proxy_url = proxies.get("https://") or proxies.get("http://")
+            if proxy_url:
+                init_kwargs["proxy"] = proxy_url
             if self.proxy_mode is ProxyMode.SCRAPERAPI:
                 # SSL Certificate verification must be disabled for
                 # ScraperAPI requests to work.

diff --git a/scholarly/_scholarly.py b/scholarly/_scholarly.py
@@ -309,7 +309,7 @@ def citedby(self, object: Publication)->_SearchScholarIterator:
 
     def _citedby_long(self, object: Publication, years):
         # Extract cites_id. Note: There could be multiple ones, separated by commas.
-        m = re.search("cites=[\d+,]*", object["citedby_url"])
+        m = re.search(r"cites=[\d+,]*", object["citedby_url"])
         pub_id = m.group()[6:]
         for y_hi, y_lo in years:
             sub_citations = self.search_citedby(publication_id=pub_id, year_low=y_lo, year_high=y_hi)

diff --git a/test_module.py b/test_module.py
@@ -2,7 +2,7 @@
 import os
 import sys
 from collections import Counter
-from scholarly import scholarly, ProxyGenerator
+from scholarly import scholarly, ProxyGenerator, MaxTriesExceededException
 from scholarly.data_types import Mandate
 from scholarly.publication_parser import PublicationParser
 import random
@@ -15,6 +15,12 @@
     import pandas as pd
 except ImportError:
     pd = None
+try:
+    from fp.errors import FreeProxyException
+except ImportError:
+    class FreeProxyException(Exception):
+        """Placeholder when free-proxy package is not installed."""
+        pass
 
 
 class TestLuminati(unittest.TestCase):
@@ -86,9 +92,12 @@ def setUpClass(cls):
         scholarly.set_timeout(5)
         scholarly.set_retries(5)
 
-        pg = ProxyGenerator()
-        pg.FreeProxies()
-        scholarly.use_proxy(pg, ProxyGenerator())
+        try:
+            pg = ProxyGenerator()
+            pg.FreeProxies()
+            scholarly.use_proxy(pg, ProxyGenerator())
+        except (FreeProxyException, MaxTriesExceededException) as e:
+            raise unittest.SkipTest(f"No working free proxy available: {e}") from e
 
         # Try storing the file temporarily as `scholarly.csv` and delete it.
         # If there exists already a file with that name, generate a random name
@@ -596,12 +605,18 @@ def setUpClass(cls):
             cls.connection_method = os.getenv("CONNECTION_METHOD")
         else:
             cls.connection_method = "none"
-            scholarly.use_proxy(None)
+            try:
+                scholarly.use_proxy(None)
+            except (FreeProxyException, MaxTriesExceededException) as e:
+                raise unittest.SkipTest(f"No working free proxy available: {e}") from e
             return
 
         # Use dual proxies for unit testing
-        secondary_proxy_generator = ProxyGenerator()
-        secondary_proxy_generator.FreeProxies()
+        try:
+            secondary_proxy_generator = ProxyGenerator()
+            secondary_proxy_generator.FreeProxies()
+        except (FreeProxyException, MaxTriesExceededException) as e:
+            raise unittest.SkipTest(f"No working free proxy available: {e}") from e
 
         proxy_generator = ProxyGenerator()
         if cls.connection_method == "tor":