fix(client,exporter): handle HTTP 429 retry and sanitize JSON
_get_with_retry now retries on HTTP 429 responses, respecting the Retry-After header when present. exporter sanitizes control characters (0x00-0x1F except \n \r \t) in text fields before JSON serialization. fixes #11 fixes #12 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -33,19 +33,42 @@ class GiteaClient:
|
||||
self.session.headers["Authorization"] = f"token {token}"
|
||||
|
||||
def _get_with_retry(self, url: str, params: dict | None = None) -> requests.Response:
|
||||
"""GET avec retry automatique sur timeout.
|
||||
"""GET avec retry automatique sur timeout ET rate limiting (HTTP 429).
|
||||
|
||||
Retente jusqu'a _MAX_RETRIES fois avec backoff lineaire (1s, 2s).
|
||||
Leve requests.Timeout apres epuisement des retries.
|
||||
Si la reponse HTTP est 429 (Too Many Requests), respecte le header
|
||||
Retry-After (en secondes) pour le delai d'attente. Si Retry-After
|
||||
est absent, utilise le backoff lineaire standard.
|
||||
|
||||
Leve requests.Timeout apres epuisement des retries sur timeout.
|
||||
Leve requests.HTTPError apres epuisement des retries sur 429.
|
||||
"""
|
||||
last_exc: requests.Timeout | None = None
|
||||
last_resp: requests.Response | None = None
|
||||
for attempt in range(self._MAX_RETRIES + 1):
|
||||
try:
|
||||
return self.session.get(url, params=params, timeout=self.timeout)
|
||||
resp = self.session.get(url, params=params, timeout=self.timeout)
|
||||
except requests.Timeout as exc:
|
||||
last_exc = exc
|
||||
if attempt < self._MAX_RETRIES:
|
||||
time.sleep(self._RETRY_DELAY * (attempt + 1))
|
||||
continue
|
||||
|
||||
if resp.status_code == 429:
|
||||
last_resp = resp
|
||||
if attempt < self._MAX_RETRIES:
|
||||
retry_after = resp.headers.get("Retry-After")
|
||||
if retry_after is not None:
|
||||
delay = float(retry_after)
|
||||
else:
|
||||
delay = self._RETRY_DELAY * (attempt + 1)
|
||||
time.sleep(delay)
|
||||
continue
|
||||
|
||||
return resp
|
||||
|
||||
if last_resp is not None:
|
||||
last_resp.raise_for_status()
|
||||
raise last_exc # type: ignore[misc]
|
||||
|
||||
def _get_paginated(self, endpoint: str, params: dict | None = None) -> list[dict]:
|
||||
|
||||
@@ -3,18 +3,38 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import re
|
||||
from dataclasses import asdict
|
||||
|
||||
from gitea_dashboard.collector import RepoData
|
||||
|
||||
# Caracteres de controle ASCII (0x00-0x1F) sauf \t (0x09), \n (0x0A), \r (0x0D)
|
||||
_CONTROL_CHAR_RE = re.compile(r"[\x00-\x08\x0b\x0c\x0e-\x1f]")
|
||||
|
||||
|
||||
def _sanitize_control_chars(text: str) -> str:
|
||||
"""Supprime les caracteres de controle ASCII (0x00-0x1F) sauf \\n, \\r et \\t.
|
||||
|
||||
Ces caracteres peuvent provenir de descriptions de repos Gitea
|
||||
et causent des erreurs JSON ('Invalid control character').
|
||||
"""
|
||||
return _CONTROL_CHAR_RE.sub("", text)
|
||||
|
||||
|
||||
def repos_to_dicts(repos: list[RepoData]) -> list[dict]:
|
||||
"""Convertit une liste de RepoData en liste de dicts serialisables.
|
||||
|
||||
Chaque dict contient toutes les donnees du RepoData,
|
||||
pret pour json.dumps().
|
||||
Sanitize les champs texte (name, full_name, description) pour
|
||||
supprimer les caracteres de controle invalides en JSON.
|
||||
"""
|
||||
return [asdict(repo) for repo in repos]
|
||||
result = []
|
||||
for repo in repos:
|
||||
d = asdict(repo)
|
||||
for field in ("name", "full_name", "description"):
|
||||
if isinstance(d.get(field), str):
|
||||
d[field] = _sanitize_control_chars(d[field])
|
||||
result.append(d)
|
||||
return result
|
||||
|
||||
|
||||
def export_json(repos: list[RepoData], indent: int = 2) -> str:
|
||||
|
||||
@@ -210,6 +210,83 @@ class TestGetWithRetry:
|
||||
mock_sleep.assert_any_call(2.0)
|
||||
|
||||
|
||||
class TestGetWithRetry429:
|
||||
"""Test _get_with_retry method (retry on HTTP 429 rate limiting)."""
|
||||
|
||||
def _make_client(self):
|
||||
return GiteaClient("http://gitea.local:3000", "tok")
|
||||
|
||||
def _make_429_response(self, retry_after=None):
|
||||
"""Create a mock 429 response."""
|
||||
resp = MagicMock()
|
||||
resp.status_code = 429
|
||||
resp.headers = {"Retry-After": retry_after} if retry_after is not None else {}
|
||||
resp.raise_for_status.side_effect = requests.HTTPError(
|
||||
"429 Too Many Requests", response=resp
|
||||
)
|
||||
return resp
|
||||
|
||||
def _make_200_response(self):
|
||||
resp = MagicMock()
|
||||
resp.status_code = 200
|
||||
return resp
|
||||
|
||||
@patch("time.sleep")
|
||||
def test_retry_on_429_with_retry_after(self, mock_sleep):
|
||||
"""429 with Retry-After header: sleeps for the indicated duration, then succeeds."""
|
||||
client = self._make_client()
|
||||
resp_429 = self._make_429_response(retry_after="2")
|
||||
resp_200 = self._make_200_response()
|
||||
|
||||
with patch.object(client.session, "get", side_effect=[resp_429, resp_200]):
|
||||
result = client._get_with_retry("http://gitea.local:3000/api/v1/test")
|
||||
|
||||
assert result.status_code == 200
|
||||
mock_sleep.assert_called_once_with(2.0)
|
||||
|
||||
@patch("time.sleep")
|
||||
def test_retry_on_429_without_retry_after(self, mock_sleep):
|
||||
"""429 without Retry-After header: uses linear backoff (1.0s for first retry)."""
|
||||
client = self._make_client()
|
||||
resp_429 = self._make_429_response()
|
||||
resp_200 = self._make_200_response()
|
||||
|
||||
with patch.object(client.session, "get", side_effect=[resp_429, resp_200]):
|
||||
result = client._get_with_retry("http://gitea.local:3000/api/v1/test")
|
||||
|
||||
assert result.status_code == 200
|
||||
mock_sleep.assert_called_once_with(1.0)
|
||||
|
||||
@patch("time.sleep")
|
||||
def test_retry_on_429_exhausted(self, mock_sleep):
|
||||
"""3 consecutive 429 responses: raises HTTPError after exhausting retries."""
|
||||
client = self._make_client()
|
||||
resp_429 = self._make_429_response()
|
||||
|
||||
with patch.object(client.session, "get", return_value=resp_429):
|
||||
with pytest.raises(requests.HTTPError):
|
||||
client._get_with_retry("http://gitea.local:3000/api/v1/test")
|
||||
|
||||
assert mock_sleep.call_count == 2
|
||||
|
||||
@patch("time.sleep")
|
||||
def test_retry_on_429_then_timeout(self, mock_sleep):
|
||||
"""429 followed by Timeout: both retry types handled in same loop."""
|
||||
client = self._make_client()
|
||||
resp_429 = self._make_429_response()
|
||||
resp_200 = self._make_200_response()
|
||||
|
||||
with patch.object(
|
||||
client.session,
|
||||
"get",
|
||||
side_effect=[resp_429, requests.Timeout("timeout"), resp_200],
|
||||
):
|
||||
result = client._get_with_retry("http://gitea.local:3000/api/v1/test")
|
||||
|
||||
assert result.status_code == 200
|
||||
assert mock_sleep.call_count == 2
|
||||
|
||||
|
||||
class TestGetLatestCommit:
|
||||
"""Test get_latest_commit method."""
|
||||
|
||||
|
||||
@@ -56,6 +56,43 @@ class TestReposToDicts:
|
||||
assert field in d, f"Missing field: {field}"
|
||||
|
||||
|
||||
class TestSanitizeControlChars:
|
||||
"""Test control character sanitization in export."""
|
||||
|
||||
def test_export_json_sanitizes_control_chars(self):
|
||||
"""Description with control chars (0x00, 0x01, 0x02) produces valid JSON without them."""
|
||||
repo = _make_repo(description="hello\x00\x01\x02world")
|
||||
output = export_json([repo])
|
||||
|
||||
parsed = json.loads(output)
|
||||
assert parsed[0]["description"] == "helloworld"
|
||||
|
||||
def test_export_json_preserves_newlines_tabs(self):
|
||||
"""Newlines and tabs are preserved in JSON export (they are valid JSON escapes)."""
|
||||
repo = _make_repo(description="line1\nline2\ttab")
|
||||
output = export_json([repo])
|
||||
|
||||
parsed = json.loads(output)
|
||||
assert parsed[0]["description"] == "line1\nline2\ttab"
|
||||
|
||||
def test_export_json_unicode_safe(self):
|
||||
"""Description with emojis and accents produces valid JSON."""
|
||||
repo = _make_repo(description="Projet avec accents : e, a et emojis 🚀🎉")
|
||||
output = export_json([repo])
|
||||
|
||||
parsed = json.loads(output)
|
||||
assert "🚀" in parsed[0]["description"]
|
||||
assert "accents" in parsed[0]["description"]
|
||||
|
||||
def test_sanitize_name_and_full_name(self):
|
||||
"""Control chars in name and full_name fields are also sanitized."""
|
||||
repo = _make_repo(name="test\x00repo", full_name="admin/test\x01repo")
|
||||
result = repos_to_dicts([repo])
|
||||
|
||||
assert result[0]["name"] == "testrepo"
|
||||
assert result[0]["full_name"] == "admin/testrepo"
|
||||
|
||||
|
||||
class TestExportJson:
|
||||
"""Test export_json function."""
|
||||
|
||||
|
||||
Reference in New Issue
Block a user