Using Proxies in Python with aiohttp
aiohttp and Proxies
aiohttp is the most popular asynchronous HTTP library for Python. It allows sending thousands of concurrent requests, making it ideal for parsing and automation. Proxy support in aiohttp is a key feature for large-scale tasks.
Basic Proxy Usage
HTTP Proxies
The simplest way to use an HTTP proxy in aiohttp is with the proxy parameter in the request method:
import aiohttp
import asyncio
async def fetch_with_proxy():
proxy = "http://proxy_ip:port"
async with aiohttp.ClientSession() as session:
async with session.get(
"https://httpbin.org/ip",
proxy=proxy
) as response:
data = await response.json()
print(data)
asyncio.run(fetch_with_proxy())
Proxies with Authentication
For proxies with a username and password, use BasicAuth:
import aiohttp
import asyncio
async def fetch_with_auth_proxy():
proxy = "http://proxy_ip:port"
proxy_auth = aiohttp.BasicAuth("username", "password")
async with aiohttp.ClientSession() as session:
async with session.get(
"https://httpbin.org/ip",
proxy=proxy,
proxy_auth=proxy_auth
) as response:
data = await response.json()
print(data)
asyncio.run(fetch_with_auth_proxy())
An alternative format is credentials in the URL:
proxy = "http://username:password@proxy_ip:port"
SOCKS5 Proxies
Installing aiohttp-socks
aiohttp does not natively support SOCKS. Install aiohttp-socks:
pip install aiohttp-socks
Usage
import aiohttp
from aiohttp_socks import ProxyConnector
async def fetch_with_socks():
connector = ProxyConnector.from_url("socks5://user:pass@proxy_ip:port")
async with aiohttp.ClientSession(connector=connector) as session:
async with session.get("https://httpbin.org/ip") as response:
data = await response.json()
print(data)
asyncio.run(fetch_with_socks())
Supported Protocols
ProxyConnector supports:
- socks5://
- socks4://
- http://
- https://
Proxy Rotation
Simple Rotation
import aiohttp
import asyncio
import random
PROXIES = [
"http://user:pass@proxy1:port",
"http://user:pass@proxy2:port",
"http://user:pass@proxy3:port",
]
async def fetch_with_rotation(url):
proxy = random.choice(PROXIES)
async with aiohttp.ClientSession() as session:
async with session.get(url, proxy=proxy) as response:
return await response.text()
async def main():
urls = ["https://example.com"] * 10
tasks = [fetch_with_rotation(url) for url in urls]
results = await asyncio.gather(*tasks, return_exceptions=True)
for r in results:
if isinstance(r, Exception):
print(f"Error: {r}")
else:
print(f"OK: {len(r)} bytes")
asyncio.run(main())
Rotation with Failed Proxy Exclusion
import aiohttp
import asyncio
from collections import deque
class ProxyRotator:
def __init__(self, proxies):
self.proxies = deque(proxies)
self.failed = set()
def get_proxy(self):
for _ in range(len(self.proxies)):
proxy = self.proxies[0]
self.proxies.rotate(-1)
if proxy not in self.failed:
return proxy
raise Exception("All proxies failed")
def mark_failed(self, proxy):
self.failed.add(proxy)
def mark_success(self, proxy):
self.failed.discard(proxy)
async def fetch(session, url, rotator, retries=3):
for attempt in range(retries):
proxy = rotator.get_proxy()
try:
async with session.get(url, proxy=proxy, timeout=aiohttp.ClientTimeout(total=10)) as resp:
if resp.status == 200:
rotator.mark_success(proxy)
return await resp.text()
elif resp.status == 403:
rotator.mark_failed(proxy)
except Exception:
rotator.mark_failed(proxy)
return None
Concurrent Requests with Limits
Semaphore for Concurrency Control
import aiohttp
import asyncio
async def fetch(session, url, proxy, semaphore):
async with semaphore:
try:
async with session.get(url, proxy=proxy, timeout=aiohttp.ClientTimeout(total=15)) as resp:
return await resp.text()
except Exception as e:
return None
async def main():
urls = [f"https://example.com/page/{i}" for i in range(100)]
proxy = "http://user:pass@proxy:port"
semaphore = asyncio.Semaphore(10) # max 10 concurrent
async with aiohttp.ClientSession() as session:
tasks = [fetch(session, url, proxy, semaphore) for url in urls]
results = await asyncio.gather(*tasks)
success = sum(1 for r in results if r)
print(f"Success: {success}/{len(urls)}")
asyncio.run(main())
Session Configuration
Timeout
timeout = aiohttp.ClientTimeout(
total=30, # total timeout
connect=10, # connection timeout
sock_read=10 # socket read timeout
)
session = aiohttp.ClientSession(timeout=timeout)
Headers
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
"Accept": "text/html,application/xhtml+xml",
"Accept-Language": "en-US,en;q=0.9",
}
session = aiohttp.ClientSession(headers=headers)
SSL
# Disabling SSL verification (for testing)
import ssl
ssl_context = ssl.create_default_context()
ssl_context.check_hostname = False
ssl_context.verify_mode = ssl.CERT_NONE
session = aiohttp.ClientSession(connector=aiohttp.TCPConnector(ssl=ssl_context))
Error Handling
import aiohttp
async def safe_fetch(session, url, proxy):
try:
async with session.get(url, proxy=proxy) as resp:
if resp.status == 200:
return await resp.text()
elif resp.status == 407:
print("Proxy authentication required")
elif resp.status == 403:
print("Forbidden - proxy might be banned")
elif resp.status == 429:
print("Rate limited - slow down")
return None
except aiohttp.ClientProxyConnectionError:
print("Cannot connect to proxy")
except aiohttp.ClientConnectorError:
print("Connection error")
except asyncio.TimeoutError:
print("Request timed out")
except Exception as e:
print(f"Unexpected error: {e}")
return None
aiohttp vs requests
| Parameter | aiohttp | requests |
|---|---|---|
| Asynchronicity | Yes (asyncio) | No (synchronous) |
| Concurrency | Thousands | Limited by threads |
| Speed | High | Medium |
| Proxies | HTTP, SOCKS (via aiohttp-socks) | HTTP, SOCKS |
| Complexity | Higher (async/await) | Simple |
| Memory | More efficient | Higher consumption |
Conclusion
aiohttp is the best choice for asynchronous proxy usage in Python. Its support for HTTP and SOCKS5 (via aiohttp-socks), proxy rotation, concurrency control, and error handling make it a powerful tool for large-scale parsing and automation.