Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion scholarly/_proxy_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -446,6 +446,7 @@ def _handle_captcha2(self, url):
cookie.pop("httpOnly", None)
cookie.pop("expiry", None)
cookie.pop("sameSite", None)
cookie.pop("secure", None) # httpx Cookies.set() does not accept 'secure'
self._session.cookies.set(**cookie)

return self._session
Expand Down Expand Up @@ -476,8 +477,11 @@ def _new_session(self, **kwargs):
init_kwargs.update(headers=_HEADERS)

if self._proxy_works:
init_kwargs["proxies"] = proxies #.get("http", None)
self._proxies = proxies
# httpx uses proxy= (single URL), not proxies= (dict)
proxy_url = proxies.get("https://") or proxies.get("http://")
if proxy_url:
init_kwargs["proxy"] = proxy_url
if self.proxy_mode is ProxyMode.SCRAPERAPI:
# SSL Certificate verification must be disabled for
# ScraperAPI requests to work.
Expand Down
2 changes: 1 addition & 1 deletion scholarly/_scholarly.py
Original file line number Diff line number Diff line change
Expand Up @@ -309,7 +309,7 @@ def citedby(self, object: Publication)->_SearchScholarIterator:

def _citedby_long(self, object: Publication, years):
# Extract cites_id. Note: There could be multiple ones, separated by commas.
m = re.search("cites=[\d+,]*", object["citedby_url"])
m = re.search(r"cites=[\d+,]*", object["citedby_url"])
pub_id = m.group()[6:]
for y_hi, y_lo in years:
sub_citations = self.search_citedby(publication_id=pub_id, year_low=y_lo, year_high=y_hi)
Expand Down
29 changes: 22 additions & 7 deletions test_module.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import os
import sys
from collections import Counter
from scholarly import scholarly, ProxyGenerator
from scholarly import scholarly, ProxyGenerator, MaxTriesExceededException
from scholarly.data_types import Mandate
from scholarly.publication_parser import PublicationParser
import random
Expand All @@ -15,6 +15,12 @@
import pandas as pd
except ImportError:
pd = None
try:
from fp.errors import FreeProxyException
except ImportError:
class FreeProxyException(Exception):
"""Placeholder when free-proxy package is not installed."""
pass


class TestLuminati(unittest.TestCase):
Expand Down Expand Up @@ -86,9 +92,12 @@ def setUpClass(cls):
scholarly.set_timeout(5)
scholarly.set_retries(5)

pg = ProxyGenerator()
pg.FreeProxies()
scholarly.use_proxy(pg, ProxyGenerator())
try:
pg = ProxyGenerator()
pg.FreeProxies()
scholarly.use_proxy(pg, ProxyGenerator())
except (FreeProxyException, MaxTriesExceededException) as e:
raise unittest.SkipTest(f"No working free proxy available: {e}") from e

# Try storing the file temporarily as `scholarly.csv` and delete it.
# If there exists already a file with that name, generate a random name
Expand Down Expand Up @@ -596,12 +605,18 @@ def setUpClass(cls):
cls.connection_method = os.getenv("CONNECTION_METHOD")
else:
cls.connection_method = "none"
scholarly.use_proxy(None)
try:
scholarly.use_proxy(None)
except (FreeProxyException, MaxTriesExceededException) as e:
raise unittest.SkipTest(f"No working free proxy available: {e}") from e
return

# Use dual proxies for unit testing
secondary_proxy_generator = ProxyGenerator()
secondary_proxy_generator.FreeProxies()
try:
secondary_proxy_generator = ProxyGenerator()
secondary_proxy_generator.FreeProxies()
except (FreeProxyException, MaxTriesExceededException) as e:
raise unittest.SkipTest(f"No working free proxy available: {e}") from e

proxy_generator = ProxyGenerator()
if cls.connection_method == "tor":
Expand Down
Loading