Added fetch module
This commit is contained in:
326
mcpServer/modules/fetch/tests/test_server.py
Normal file
326
mcpServer/modules/fetch/tests/test_server.py
Normal file
@@ -0,0 +1,326 @@
|
||||
"""Tests for the fetch MCP server."""
|
||||
|
||||
import pytest
|
||||
from unittest.mock import AsyncMock, patch, MagicMock
|
||||
from mcp.shared.exceptions import McpError
|
||||
|
||||
from mcp_server_fetch.server import (
|
||||
extract_content_from_html,
|
||||
get_robots_txt_url,
|
||||
check_may_autonomously_fetch_url,
|
||||
fetch_url,
|
||||
DEFAULT_USER_AGENT_AUTONOMOUS,
|
||||
)
|
||||
|
||||
|
||||
class TestGetRobotsTxtUrl:
|
||||
"""Tests for get_robots_txt_url function."""
|
||||
|
||||
def test_simple_url(self):
|
||||
"""Test with a simple URL."""
|
||||
result = get_robots_txt_url("https://example.com/page")
|
||||
assert result == "https://example.com/robots.txt"
|
||||
|
||||
def test_url_with_path(self):
|
||||
"""Test with URL containing path."""
|
||||
result = get_robots_txt_url("https://example.com/some/deep/path/page.html")
|
||||
assert result == "https://example.com/robots.txt"
|
||||
|
||||
def test_url_with_query_params(self):
|
||||
"""Test with URL containing query parameters."""
|
||||
result = get_robots_txt_url("https://example.com/page?foo=bar&baz=qux")
|
||||
assert result == "https://example.com/robots.txt"
|
||||
|
||||
def test_url_with_port(self):
|
||||
"""Test with URL containing port number."""
|
||||
result = get_robots_txt_url("https://example.com:8080/page")
|
||||
assert result == "https://example.com:8080/robots.txt"
|
||||
|
||||
def test_url_with_fragment(self):
|
||||
"""Test with URL containing fragment."""
|
||||
result = get_robots_txt_url("https://example.com/page#section")
|
||||
assert result == "https://example.com/robots.txt"
|
||||
|
||||
def test_http_url(self):
|
||||
"""Test with HTTP URL."""
|
||||
result = get_robots_txt_url("http://example.com/page")
|
||||
assert result == "http://example.com/robots.txt"
|
||||
|
||||
|
||||
class TestExtractContentFromHtml:
|
||||
"""Tests for extract_content_from_html function."""
|
||||
|
||||
def test_simple_html(self):
|
||||
"""Test with simple HTML content."""
|
||||
html = """
|
||||
<html>
|
||||
<head><title>Test Page</title></head>
|
||||
<body>
|
||||
<article>
|
||||
<h1>Hello World</h1>
|
||||
<p>This is a test paragraph.</p>
|
||||
</article>
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
result = extract_content_from_html(html)
|
||||
# readabilipy may extract different parts depending on the content
|
||||
assert "test paragraph" in result
|
||||
|
||||
def test_html_with_links(self):
|
||||
"""Test that links are converted to markdown."""
|
||||
html = """
|
||||
<html>
|
||||
<body>
|
||||
<article>
|
||||
<p>Visit <a href="https://example.com">Example</a> for more.</p>
|
||||
</article>
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
result = extract_content_from_html(html)
|
||||
assert "Example" in result
|
||||
|
||||
def test_empty_content_returns_error(self):
|
||||
"""Test that empty/invalid HTML returns error message."""
|
||||
html = ""
|
||||
result = extract_content_from_html(html)
|
||||
assert "<error>" in result
|
||||
|
||||
|
||||
class TestCheckMayAutonomouslyFetchUrl:
|
||||
"""Tests for check_may_autonomously_fetch_url function."""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_allows_when_robots_txt_404(self):
|
||||
"""Test that fetching is allowed when robots.txt returns 404."""
|
||||
mock_response = MagicMock()
|
||||
mock_response.status_code = 404
|
||||
|
||||
with patch("httpx.AsyncClient") as mock_client_class:
|
||||
mock_client = AsyncMock()
|
||||
mock_client.get = AsyncMock(return_value=mock_response)
|
||||
mock_client_class.return_value.__aenter__ = AsyncMock(return_value=mock_client)
|
||||
mock_client_class.return_value.__aexit__ = AsyncMock(return_value=None)
|
||||
|
||||
# Should not raise
|
||||
await check_may_autonomously_fetch_url(
|
||||
"https://example.com/page",
|
||||
DEFAULT_USER_AGENT_AUTONOMOUS
|
||||
)
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_blocks_when_robots_txt_401(self):
|
||||
"""Test that fetching is blocked when robots.txt returns 401."""
|
||||
mock_response = MagicMock()
|
||||
mock_response.status_code = 401
|
||||
|
||||
with patch("httpx.AsyncClient") as mock_client_class:
|
||||
mock_client = AsyncMock()
|
||||
mock_client.get = AsyncMock(return_value=mock_response)
|
||||
mock_client_class.return_value.__aenter__ = AsyncMock(return_value=mock_client)
|
||||
mock_client_class.return_value.__aexit__ = AsyncMock(return_value=None)
|
||||
|
||||
with pytest.raises(McpError):
|
||||
await check_may_autonomously_fetch_url(
|
||||
"https://example.com/page",
|
||||
DEFAULT_USER_AGENT_AUTONOMOUS
|
||||
)
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_blocks_when_robots_txt_403(self):
|
||||
"""Test that fetching is blocked when robots.txt returns 403."""
|
||||
mock_response = MagicMock()
|
||||
mock_response.status_code = 403
|
||||
|
||||
with patch("httpx.AsyncClient") as mock_client_class:
|
||||
mock_client = AsyncMock()
|
||||
mock_client.get = AsyncMock(return_value=mock_response)
|
||||
mock_client_class.return_value.__aenter__ = AsyncMock(return_value=mock_client)
|
||||
mock_client_class.return_value.__aexit__ = AsyncMock(return_value=None)
|
||||
|
||||
with pytest.raises(McpError):
|
||||
await check_may_autonomously_fetch_url(
|
||||
"https://example.com/page",
|
||||
DEFAULT_USER_AGENT_AUTONOMOUS
|
||||
)
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_allows_when_robots_txt_allows_all(self):
|
||||
"""Test that fetching is allowed when robots.txt allows all."""
|
||||
mock_response = MagicMock()
|
||||
mock_response.status_code = 200
|
||||
mock_response.text = "User-agent: *\nAllow: /"
|
||||
|
||||
with patch("httpx.AsyncClient") as mock_client_class:
|
||||
mock_client = AsyncMock()
|
||||
mock_client.get = AsyncMock(return_value=mock_response)
|
||||
mock_client_class.return_value.__aenter__ = AsyncMock(return_value=mock_client)
|
||||
mock_client_class.return_value.__aexit__ = AsyncMock(return_value=None)
|
||||
|
||||
# Should not raise
|
||||
await check_may_autonomously_fetch_url(
|
||||
"https://example.com/page",
|
||||
DEFAULT_USER_AGENT_AUTONOMOUS
|
||||
)
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_blocks_when_robots_txt_disallows_all(self):
|
||||
"""Test that fetching is blocked when robots.txt disallows all."""
|
||||
mock_response = MagicMock()
|
||||
mock_response.status_code = 200
|
||||
mock_response.text = "User-agent: *\nDisallow: /"
|
||||
|
||||
with patch("httpx.AsyncClient") as mock_client_class:
|
||||
mock_client = AsyncMock()
|
||||
mock_client.get = AsyncMock(return_value=mock_response)
|
||||
mock_client_class.return_value.__aenter__ = AsyncMock(return_value=mock_client)
|
||||
mock_client_class.return_value.__aexit__ = AsyncMock(return_value=None)
|
||||
|
||||
with pytest.raises(McpError):
|
||||
await check_may_autonomously_fetch_url(
|
||||
"https://example.com/page",
|
||||
DEFAULT_USER_AGENT_AUTONOMOUS
|
||||
)
|
||||
|
||||
|
||||
class TestFetchUrl:
|
||||
"""Tests for fetch_url function."""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_fetch_html_page(self):
|
||||
"""Test fetching an HTML page returns markdown content."""
|
||||
mock_response = MagicMock()
|
||||
mock_response.status_code = 200
|
||||
mock_response.text = """
|
||||
<html>
|
||||
<body>
|
||||
<article>
|
||||
<h1>Test Page</h1>
|
||||
<p>Hello World</p>
|
||||
</article>
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
mock_response.headers = {"content-type": "text/html"}
|
||||
|
||||
with patch("httpx.AsyncClient") as mock_client_class:
|
||||
mock_client = AsyncMock()
|
||||
mock_client.get = AsyncMock(return_value=mock_response)
|
||||
mock_client_class.return_value.__aenter__ = AsyncMock(return_value=mock_client)
|
||||
mock_client_class.return_value.__aexit__ = AsyncMock(return_value=None)
|
||||
|
||||
content, prefix = await fetch_url(
|
||||
"https://example.com/page",
|
||||
DEFAULT_USER_AGENT_AUTONOMOUS
|
||||
)
|
||||
|
||||
# HTML is processed, so we check it returns something
|
||||
assert isinstance(content, str)
|
||||
assert prefix == ""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_fetch_html_page_raw(self):
|
||||
"""Test fetching an HTML page with raw=True returns original HTML."""
|
||||
html_content = "<html><body><h1>Test</h1></body></html>"
|
||||
mock_response = MagicMock()
|
||||
mock_response.status_code = 200
|
||||
mock_response.text = html_content
|
||||
mock_response.headers = {"content-type": "text/html"}
|
||||
|
||||
with patch("httpx.AsyncClient") as mock_client_class:
|
||||
mock_client = AsyncMock()
|
||||
mock_client.get = AsyncMock(return_value=mock_response)
|
||||
mock_client_class.return_value.__aenter__ = AsyncMock(return_value=mock_client)
|
||||
mock_client_class.return_value.__aexit__ = AsyncMock(return_value=None)
|
||||
|
||||
content, prefix = await fetch_url(
|
||||
"https://example.com/page",
|
||||
DEFAULT_USER_AGENT_AUTONOMOUS,
|
||||
force_raw=True
|
||||
)
|
||||
|
||||
assert content == html_content
|
||||
assert "cannot be simplified" in prefix
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_fetch_json_returns_raw(self):
|
||||
"""Test fetching JSON content returns raw content."""
|
||||
json_content = '{"key": "value"}'
|
||||
mock_response = MagicMock()
|
||||
mock_response.status_code = 200
|
||||
mock_response.text = json_content
|
||||
mock_response.headers = {"content-type": "application/json"}
|
||||
|
||||
with patch("httpx.AsyncClient") as mock_client_class:
|
||||
mock_client = AsyncMock()
|
||||
mock_client.get = AsyncMock(return_value=mock_response)
|
||||
mock_client_class.return_value.__aenter__ = AsyncMock(return_value=mock_client)
|
||||
mock_client_class.return_value.__aexit__ = AsyncMock(return_value=None)
|
||||
|
||||
content, prefix = await fetch_url(
|
||||
"https://api.example.com/data",
|
||||
DEFAULT_USER_AGENT_AUTONOMOUS
|
||||
)
|
||||
|
||||
assert content == json_content
|
||||
assert "cannot be simplified" in prefix
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_fetch_404_raises_error(self):
|
||||
"""Test that 404 response raises McpError."""
|
||||
mock_response = MagicMock()
|
||||
mock_response.status_code = 404
|
||||
|
||||
with patch("httpx.AsyncClient") as mock_client_class:
|
||||
mock_client = AsyncMock()
|
||||
mock_client.get = AsyncMock(return_value=mock_response)
|
||||
mock_client_class.return_value.__aenter__ = AsyncMock(return_value=mock_client)
|
||||
mock_client_class.return_value.__aexit__ = AsyncMock(return_value=None)
|
||||
|
||||
with pytest.raises(McpError):
|
||||
await fetch_url(
|
||||
"https://example.com/notfound",
|
||||
DEFAULT_USER_AGENT_AUTONOMOUS
|
||||
)
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_fetch_500_raises_error(self):
|
||||
"""Test that 500 response raises McpError."""
|
||||
mock_response = MagicMock()
|
||||
mock_response.status_code = 500
|
||||
|
||||
with patch("httpx.AsyncClient") as mock_client_class:
|
||||
mock_client = AsyncMock()
|
||||
mock_client.get = AsyncMock(return_value=mock_response)
|
||||
mock_client_class.return_value.__aenter__ = AsyncMock(return_value=mock_client)
|
||||
mock_client_class.return_value.__aexit__ = AsyncMock(return_value=None)
|
||||
|
||||
with pytest.raises(McpError):
|
||||
await fetch_url(
|
||||
"https://example.com/error",
|
||||
DEFAULT_USER_AGENT_AUTONOMOUS
|
||||
)
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_fetch_with_proxy(self):
|
||||
"""Test that proxy URL is passed to client."""
|
||||
mock_response = MagicMock()
|
||||
mock_response.status_code = 200
|
||||
mock_response.text = '{"data": "test"}'
|
||||
mock_response.headers = {"content-type": "application/json"}
|
||||
|
||||
with patch("httpx.AsyncClient") as mock_client_class:
|
||||
mock_client = AsyncMock()
|
||||
mock_client.get = AsyncMock(return_value=mock_response)
|
||||
mock_client_class.return_value.__aenter__ = AsyncMock(return_value=mock_client)
|
||||
mock_client_class.return_value.__aexit__ = AsyncMock(return_value=None)
|
||||
|
||||
await fetch_url(
|
||||
"https://example.com/data",
|
||||
DEFAULT_USER_AGENT_AUTONOMOUS,
|
||||
proxy_url="http://proxy.example.com:8080"
|
||||
)
|
||||
|
||||
# Verify AsyncClient was called with proxy
|
||||
mock_client_class.assert_called_once_with(proxies="http://proxy.example.com:8080")
|
||||
Reference in New Issue
Block a user