fix(client,exporter): handle HTTP 429 retry and sanitize JSON

_get_with_retry now retries on HTTP 429 responses, respecting the
Retry-After header when present. exporter sanitizes control characters
(0x00-0x1F except \n \r \t) in text fields before JSON serialization.

fixes #11
fixes #12

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
sylvain
2026-03-12 19:15:25 +01:00
parent 9783389bfb
commit b40dea32f4
4 changed files with 163 additions and 6 deletions

View File

@@ -33,19 +33,42 @@ class GiteaClient:
self.session.headers["Authorization"] = f"token {token}" self.session.headers["Authorization"] = f"token {token}"
def _get_with_retry(self, url: str, params: dict | None = None) -> requests.Response: def _get_with_retry(self, url: str, params: dict | None = None) -> requests.Response:
"""GET avec retry automatique sur timeout. """GET avec retry automatique sur timeout ET rate limiting (HTTP 429).
Retente jusqu'a _MAX_RETRIES fois avec backoff lineaire (1s, 2s). Retente jusqu'a _MAX_RETRIES fois avec backoff lineaire (1s, 2s).
Leve requests.Timeout apres epuisement des retries. Si la reponse HTTP est 429 (Too Many Requests), respecte le header
Retry-After (en secondes) pour le delai d'attente. Si Retry-After
est absent, utilise le backoff lineaire standard.
Leve requests.Timeout apres epuisement des retries sur timeout.
Leve requests.HTTPError apres epuisement des retries sur 429.
""" """
last_exc: requests.Timeout | None = None last_exc: requests.Timeout | None = None
last_resp: requests.Response | None = None
for attempt in range(self._MAX_RETRIES + 1): for attempt in range(self._MAX_RETRIES + 1):
try: try:
return self.session.get(url, params=params, timeout=self.timeout) resp = self.session.get(url, params=params, timeout=self.timeout)
except requests.Timeout as exc: except requests.Timeout as exc:
last_exc = exc last_exc = exc
if attempt < self._MAX_RETRIES: if attempt < self._MAX_RETRIES:
time.sleep(self._RETRY_DELAY * (attempt + 1)) time.sleep(self._RETRY_DELAY * (attempt + 1))
continue
if resp.status_code == 429:
last_resp = resp
if attempt < self._MAX_RETRIES:
retry_after = resp.headers.get("Retry-After")
if retry_after is not None:
delay = float(retry_after)
else:
delay = self._RETRY_DELAY * (attempt + 1)
time.sleep(delay)
continue
return resp
if last_resp is not None:
last_resp.raise_for_status()
raise last_exc # type: ignore[misc] raise last_exc # type: ignore[misc]
def _get_paginated(self, endpoint: str, params: dict | None = None) -> list[dict]: def _get_paginated(self, endpoint: str, params: dict | None = None) -> list[dict]:

View File

@@ -3,18 +3,38 @@
from __future__ import annotations from __future__ import annotations
import json import json
import re
from dataclasses import asdict from dataclasses import asdict
from gitea_dashboard.collector import RepoData from gitea_dashboard.collector import RepoData
# Caracteres de controle ASCII (0x00-0x1F) sauf \t (0x09), \n (0x0A), \r (0x0D)
_CONTROL_CHAR_RE = re.compile(r"[\x00-\x08\x0b\x0c\x0e-\x1f]")
def _sanitize_control_chars(text: str) -> str:
"""Supprime les caracteres de controle ASCII (0x00-0x1F) sauf \\n, \\r et \\t.
Ces caracteres peuvent provenir de descriptions de repos Gitea
et causent des erreurs JSON ('Invalid control character').
"""
return _CONTROL_CHAR_RE.sub("", text)
def repos_to_dicts(repos: list[RepoData]) -> list[dict]: def repos_to_dicts(repos: list[RepoData]) -> list[dict]:
"""Convertit une liste de RepoData en liste de dicts serialisables. """Convertit une liste de RepoData en liste de dicts serialisables.
Chaque dict contient toutes les donnees du RepoData, Sanitize les champs texte (name, full_name, description) pour
pret pour json.dumps(). supprimer les caracteres de controle invalides en JSON.
""" """
return [asdict(repo) for repo in repos] result = []
for repo in repos:
d = asdict(repo)
for field in ("name", "full_name", "description"):
if isinstance(d.get(field), str):
d[field] = _sanitize_control_chars(d[field])
result.append(d)
return result
def export_json(repos: list[RepoData], indent: int = 2) -> str: def export_json(repos: list[RepoData], indent: int = 2) -> str:

View File

@@ -210,6 +210,83 @@ class TestGetWithRetry:
mock_sleep.assert_any_call(2.0) mock_sleep.assert_any_call(2.0)
class TestGetWithRetry429:
"""Test _get_with_retry method (retry on HTTP 429 rate limiting)."""
def _make_client(self):
return GiteaClient("http://gitea.local:3000", "tok")
def _make_429_response(self, retry_after=None):
"""Create a mock 429 response."""
resp = MagicMock()
resp.status_code = 429
resp.headers = {"Retry-After": retry_after} if retry_after is not None else {}
resp.raise_for_status.side_effect = requests.HTTPError(
"429 Too Many Requests", response=resp
)
return resp
def _make_200_response(self):
resp = MagicMock()
resp.status_code = 200
return resp
@patch("time.sleep")
def test_retry_on_429_with_retry_after(self, mock_sleep):
"""429 with Retry-After header: sleeps for the indicated duration, then succeeds."""
client = self._make_client()
resp_429 = self._make_429_response(retry_after="2")
resp_200 = self._make_200_response()
with patch.object(client.session, "get", side_effect=[resp_429, resp_200]):
result = client._get_with_retry("http://gitea.local:3000/api/v1/test")
assert result.status_code == 200
mock_sleep.assert_called_once_with(2.0)
@patch("time.sleep")
def test_retry_on_429_without_retry_after(self, mock_sleep):
"""429 without Retry-After header: uses linear backoff (1.0s for first retry)."""
client = self._make_client()
resp_429 = self._make_429_response()
resp_200 = self._make_200_response()
with patch.object(client.session, "get", side_effect=[resp_429, resp_200]):
result = client._get_with_retry("http://gitea.local:3000/api/v1/test")
assert result.status_code == 200
mock_sleep.assert_called_once_with(1.0)
@patch("time.sleep")
def test_retry_on_429_exhausted(self, mock_sleep):
"""3 consecutive 429 responses: raises HTTPError after exhausting retries."""
client = self._make_client()
resp_429 = self._make_429_response()
with patch.object(client.session, "get", return_value=resp_429):
with pytest.raises(requests.HTTPError):
client._get_with_retry("http://gitea.local:3000/api/v1/test")
assert mock_sleep.call_count == 2
@patch("time.sleep")
def test_retry_on_429_then_timeout(self, mock_sleep):
"""429 followed by Timeout: both retry types handled in same loop."""
client = self._make_client()
resp_429 = self._make_429_response()
resp_200 = self._make_200_response()
with patch.object(
client.session,
"get",
side_effect=[resp_429, requests.Timeout("timeout"), resp_200],
):
result = client._get_with_retry("http://gitea.local:3000/api/v1/test")
assert result.status_code == 200
assert mock_sleep.call_count == 2
class TestGetLatestCommit: class TestGetLatestCommit:
"""Test get_latest_commit method.""" """Test get_latest_commit method."""

View File

@@ -56,6 +56,43 @@ class TestReposToDicts:
assert field in d, f"Missing field: {field}" assert field in d, f"Missing field: {field}"
class TestSanitizeControlChars:
"""Test control character sanitization in export."""
def test_export_json_sanitizes_control_chars(self):
"""Description with control chars (0x00, 0x01, 0x02) produces valid JSON without them."""
repo = _make_repo(description="hello\x00\x01\x02world")
output = export_json([repo])
parsed = json.loads(output)
assert parsed[0]["description"] == "helloworld"
def test_export_json_preserves_newlines_tabs(self):
"""Newlines and tabs are preserved in JSON export (they are valid JSON escapes)."""
repo = _make_repo(description="line1\nline2\ttab")
output = export_json([repo])
parsed = json.loads(output)
assert parsed[0]["description"] == "line1\nline2\ttab"
def test_export_json_unicode_safe(self):
"""Description with emojis and accents produces valid JSON."""
repo = _make_repo(description="Projet avec accents : e, a et emojis 🚀🎉")
output = export_json([repo])
parsed = json.loads(output)
assert "🚀" in parsed[0]["description"]
assert "accents" in parsed[0]["description"]
def test_sanitize_name_and_full_name(self):
"""Control chars in name and full_name fields are also sanitized."""
repo = _make_repo(name="test\x00repo", full_name="admin/test\x01repo")
result = repos_to_dicts([repo])
assert result[0]["name"] == "testrepo"
assert result[0]["full_name"] == "admin/testrepo"
class TestExportJson: class TestExportJson:
"""Test export_json function.""" """Test export_json function."""