Async added and removed

This commit is contained in:
emdee 2022-11-27 01:10:18 +00:00
parent 08626942d3
commit 204a6adc48
4 changed files with 483 additions and 258 deletions

View file

@ -7,10 +7,15 @@ import datetime
import os
import re
import sys
import ipaddress
import warnings
import urllib3.util
from urllib3.util import parse_url as urlparse
from stem.control import Controller
# from stem.util.tor_tools import *
from urllib3.util import parse_url as urlparse
try:
# unbound is not on pypi
@ -20,11 +25,13 @@ except:
global LOG
import logging
import warnings
warnings.filterwarnings('ignore')
LOG = logging.getLogger()
logging.getLogger("urllib3").setLevel(logging.INFO)
# import urllib3.contrib.pyopenssl
# urllib3.contrib.pyopenssl.inject_into_urllib3()
# download this python library from
# https://github.com/erans/torcontactinfoparser
# sys.path.append('/home/....')
@ -211,7 +218,7 @@ def find_validation_candidates(controller,
result[domain] = {prooftype: [fingerprint]}
return result
def oDownloadUrlRequests(uri, sCAfile, timeout=30, host='127.0.0.1', port=9050):
def oDownloadUrlRequests(uri, sCAfile, timeout=30, host='127.0.0.1', port=9050, content_type='text/plain', session=None):
import requests
# socks proxy used for outbound web requests (for validation of proofs)
proxy = {'https': "socks5h://{host}:{port}"}
@ -225,6 +232,7 @@ def oDownloadUrlRequests(uri, sCAfile, timeout=30, host='127.0.0.1', port=9050):
# urllib3.connection WARNING Certificate did not match expected hostname:
head = requests.head(uri, timeout=timeout, proxies=proxy, headers=headers)
except Exception as e:
LOG.exception(f"{e}")
raise TrustorError(f"HTTP HEAD request failed for {uri} {e}")
if head.status_code >= 300:
@ -234,15 +242,15 @@ def oDownloadUrlRequests(uri, sCAfile, timeout=30, host='127.0.0.1', port=9050):
if not os.path.exists(sCAfile):
raise TrustorError(f"File not found CAfile {sCAfile}")
if session is None: session = requests.sessions.Session()
try:
with requests.sessions.Session() as session:
oReqResp = session.request(method="get", url=uri,
proxies=proxy,
timeout=timeout,
headers=headers,
allow_redirects=False,
verify=True
)
oReqResp = session.request(method="get", url=uri,
proxies=proxy,
timeout=timeout,
headers=headers,
allow_redirects=False,
verify=True
)
except:
LOG.warn("HTTP GET request failed for %s" % uri)
raise
@ -257,13 +265,61 @@ def oDownloadUrlRequests(uri, sCAfile, timeout=30, host='127.0.0.1', port=9050):
raise TrustorError(f'Redirect detected {uri} vs %s (final)' % (oReqResp.url))
return oReqResp
logging.getLogger("urllib3").setLevel(logging.INFO)
# import urllib3.contrib.pyopenssl
# urllib3.contrib.pyopenssl.inject_into_urllib3()
# There's no point in using asyncio because of duplicate urls in the tasks
async def oDownloadUrlHttpx(uri, sCAfile, timeout=30, host='127.0.0.1', port=9050, content_type='text/plain'):
import httpcore
import asyncio
import httpx
# socks proxy used for outbound web requests (for validation of proofs)
if host and port:
proxy = "socks5://{host}:{port}"
else:
proxy = ''
# we use this UA string when connecting to webservers to fetch rsa-fingerprint.txt proof files
# https://nusenu.github.io/ContactInfo-Information-Sharing-Specification/#uri-rsa
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; rv:91.0) Gecko/20100101 Firefox/91.0'}
import ipaddress
LOG.debug("fetching %s...." % uri)
async with httpx.AsyncClient(proxies=proxy) as client:
try:
# https://www.python-httpx.org/advanced/
head = await client.head(uri, timeout=timeout, headers=headers)
except Exception as e:
LOG.exception(f"{e}")
raise TrustorError(f"HTTP HEAD request failed for {uri} {e}")
import urllib3.util
if head.status_code >= 300:
raise TrustorError(f"HTTP Errorcode {head.status_code}")
if content_type and not head.headers['Content-Type'].startswith(content_type):
raise TrustorError(f"HTTP Content-Type != {content_type}" )
if not os.path.exists(sCAfile):
raise TrustorError(f"File not found CAfile {sCAfile}")
try:
oReqResp = await client.get(url=uri,
timeout=timeout,
headers=headers,
max_redirects=0,
verify=sCAfile,
)
except (asyncio.exceptions.CancelledError,
httpcore.PoolTimeout,
Exception,) as e:
LOG.warn(f"HTTP GET request failed for %s {e}" % uri)
raise
if oReqResp.status_code != 200:
LOG.warn(f"HTTP Errorcode {head.status_code}")
raise TrustorError(f"HTTP Errorcode {head.status_code}")
if not oReqResp.headers['Content-Type'].startswith('text/plain'):
LOG.warn(f"HTTP Content-Type != text/plain")
raise TrustorError(f"HTTP Content-Type != text/plain")
# check for redirects (not allowed as per spec)
if oReqResp.url != uri:
LOG.error(f'Redirect detected {uri} vs %s (final)' % (oReqResp.url))
raise TrustorError(f'Redirect detected {uri} vs %s (final)' % (oReqResp.url))
return oReqResp
def ballow_subdomain_matching(hostname, dnsnames):
@ -276,7 +332,6 @@ def ballow_subdomain_matching(hostname, dnsnames):
from urllib3.util.ssl_match_hostname import (CertificateError, _dnsname_match,
_ipaddress_match)
def my_match_hostname(cert, hostname):
"""Verify that *cert* (in decoded format as returned by
SSLSocket.getpeercert()) matches the *hostname*. RFC 2818 and RFC 6125
@ -370,13 +425,14 @@ urllib3.connection._match_hostname = _my_match_hostname
from urllib3.contrib.socks import SOCKSProxyManager
# from urllib3 import Retry
def oDownloadUrlUrllib3(uri, sCAfile,
timeout=30,
host='127.0.0.1',
port=9050,
content_type=''):
def oDownloadUrlUrllib3Socks(uri,
sCAfile,
timeout=30,
host='127.0.0.1',
port=9050,
session=None,
content_type='text/plain'):
"""Theres no need to use requests here and it
adds too many layers on the SSL to be able to get at things
"""