fix(client,exporter): handle HTTP 429 retry and sanitize JSON
_get_with_retry now retries on HTTP 429 responses, respecting the Retry-After header when present. exporter sanitizes control characters (0x00-0x1F except \n \r \t) in text fields before JSON serialization. fixes #11 fixes #12 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -33,19 +33,42 @@ class GiteaClient:
|
|||||||
self.session.headers["Authorization"] = f"token {token}"
|
self.session.headers["Authorization"] = f"token {token}"
|
||||||
|
|
||||||
def _get_with_retry(self, url: str, params: dict | None = None) -> requests.Response:
|
def _get_with_retry(self, url: str, params: dict | None = None) -> requests.Response:
|
||||||
"""GET avec retry automatique sur timeout.
|
"""GET avec retry automatique sur timeout ET rate limiting (HTTP 429).
|
||||||
|
|
||||||
Retente jusqu'a _MAX_RETRIES fois avec backoff lineaire (1s, 2s).
|
Retente jusqu'a _MAX_RETRIES fois avec backoff lineaire (1s, 2s).
|
||||||
Leve requests.Timeout apres epuisement des retries.
|
Si la reponse HTTP est 429 (Too Many Requests), respecte le header
|
||||||
|
Retry-After (en secondes) pour le delai d'attente. Si Retry-After
|
||||||
|
est absent, utilise le backoff lineaire standard.
|
||||||
|
|
||||||
|
Leve requests.Timeout apres epuisement des retries sur timeout.
|
||||||
|
Leve requests.HTTPError apres epuisement des retries sur 429.
|
||||||
"""
|
"""
|
||||||
last_exc: requests.Timeout | None = None
|
last_exc: requests.Timeout | None = None
|
||||||
|
last_resp: requests.Response | None = None
|
||||||
for attempt in range(self._MAX_RETRIES + 1):
|
for attempt in range(self._MAX_RETRIES + 1):
|
||||||
try:
|
try:
|
||||||
return self.session.get(url, params=params, timeout=self.timeout)
|
resp = self.session.get(url, params=params, timeout=self.timeout)
|
||||||
except requests.Timeout as exc:
|
except requests.Timeout as exc:
|
||||||
last_exc = exc
|
last_exc = exc
|
||||||
if attempt < self._MAX_RETRIES:
|
if attempt < self._MAX_RETRIES:
|
||||||
time.sleep(self._RETRY_DELAY * (attempt + 1))
|
time.sleep(self._RETRY_DELAY * (attempt + 1))
|
||||||
|
continue
|
||||||
|
|
||||||
|
if resp.status_code == 429:
|
||||||
|
last_resp = resp
|
||||||
|
if attempt < self._MAX_RETRIES:
|
||||||
|
retry_after = resp.headers.get("Retry-After")
|
||||||
|
if retry_after is not None:
|
||||||
|
delay = float(retry_after)
|
||||||
|
else:
|
||||||
|
delay = self._RETRY_DELAY * (attempt + 1)
|
||||||
|
time.sleep(delay)
|
||||||
|
continue
|
||||||
|
|
||||||
|
return resp
|
||||||
|
|
||||||
|
if last_resp is not None:
|
||||||
|
last_resp.raise_for_status()
|
||||||
raise last_exc # type: ignore[misc]
|
raise last_exc # type: ignore[misc]
|
||||||
|
|
||||||
def _get_paginated(self, endpoint: str, params: dict | None = None) -> list[dict]:
|
def _get_paginated(self, endpoint: str, params: dict | None = None) -> list[dict]:
|
||||||
|
|||||||
@@ -3,18 +3,38 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import json
|
import json
|
||||||
|
import re
|
||||||
from dataclasses import asdict
|
from dataclasses import asdict
|
||||||
|
|
||||||
from gitea_dashboard.collector import RepoData
|
from gitea_dashboard.collector import RepoData
|
||||||
|
|
||||||
|
# Caracteres de controle ASCII (0x00-0x1F) sauf \t (0x09), \n (0x0A), \r (0x0D)
|
||||||
|
_CONTROL_CHAR_RE = re.compile(r"[\x00-\x08\x0b\x0c\x0e-\x1f]")
|
||||||
|
|
||||||
|
|
||||||
|
def _sanitize_control_chars(text: str) -> str:
|
||||||
|
"""Supprime les caracteres de controle ASCII (0x00-0x1F) sauf \\n, \\r et \\t.
|
||||||
|
|
||||||
|
Ces caracteres peuvent provenir de descriptions de repos Gitea
|
||||||
|
et causent des erreurs JSON ('Invalid control character').
|
||||||
|
"""
|
||||||
|
return _CONTROL_CHAR_RE.sub("", text)
|
||||||
|
|
||||||
|
|
||||||
def repos_to_dicts(repos: list[RepoData]) -> list[dict]:
|
def repos_to_dicts(repos: list[RepoData]) -> list[dict]:
|
||||||
"""Convertit une liste de RepoData en liste de dicts serialisables.
|
"""Convertit une liste de RepoData en liste de dicts serialisables.
|
||||||
|
|
||||||
Chaque dict contient toutes les donnees du RepoData,
|
Sanitize les champs texte (name, full_name, description) pour
|
||||||
pret pour json.dumps().
|
supprimer les caracteres de controle invalides en JSON.
|
||||||
"""
|
"""
|
||||||
return [asdict(repo) for repo in repos]
|
result = []
|
||||||
|
for repo in repos:
|
||||||
|
d = asdict(repo)
|
||||||
|
for field in ("name", "full_name", "description"):
|
||||||
|
if isinstance(d.get(field), str):
|
||||||
|
d[field] = _sanitize_control_chars(d[field])
|
||||||
|
result.append(d)
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
def export_json(repos: list[RepoData], indent: int = 2) -> str:
|
def export_json(repos: list[RepoData], indent: int = 2) -> str:
|
||||||
|
|||||||
@@ -210,6 +210,83 @@ class TestGetWithRetry:
|
|||||||
mock_sleep.assert_any_call(2.0)
|
mock_sleep.assert_any_call(2.0)
|
||||||
|
|
||||||
|
|
||||||
|
class TestGetWithRetry429:
|
||||||
|
"""Test _get_with_retry method (retry on HTTP 429 rate limiting)."""
|
||||||
|
|
||||||
|
def _make_client(self):
|
||||||
|
return GiteaClient("http://gitea.local:3000", "tok")
|
||||||
|
|
||||||
|
def _make_429_response(self, retry_after=None):
|
||||||
|
"""Create a mock 429 response."""
|
||||||
|
resp = MagicMock()
|
||||||
|
resp.status_code = 429
|
||||||
|
resp.headers = {"Retry-After": retry_after} if retry_after is not None else {}
|
||||||
|
resp.raise_for_status.side_effect = requests.HTTPError(
|
||||||
|
"429 Too Many Requests", response=resp
|
||||||
|
)
|
||||||
|
return resp
|
||||||
|
|
||||||
|
def _make_200_response(self):
|
||||||
|
resp = MagicMock()
|
||||||
|
resp.status_code = 200
|
||||||
|
return resp
|
||||||
|
|
||||||
|
@patch("time.sleep")
|
||||||
|
def test_retry_on_429_with_retry_after(self, mock_sleep):
|
||||||
|
"""429 with Retry-After header: sleeps for the indicated duration, then succeeds."""
|
||||||
|
client = self._make_client()
|
||||||
|
resp_429 = self._make_429_response(retry_after="2")
|
||||||
|
resp_200 = self._make_200_response()
|
||||||
|
|
||||||
|
with patch.object(client.session, "get", side_effect=[resp_429, resp_200]):
|
||||||
|
result = client._get_with_retry("http://gitea.local:3000/api/v1/test")
|
||||||
|
|
||||||
|
assert result.status_code == 200
|
||||||
|
mock_sleep.assert_called_once_with(2.0)
|
||||||
|
|
||||||
|
@patch("time.sleep")
|
||||||
|
def test_retry_on_429_without_retry_after(self, mock_sleep):
|
||||||
|
"""429 without Retry-After header: uses linear backoff (1.0s for first retry)."""
|
||||||
|
client = self._make_client()
|
||||||
|
resp_429 = self._make_429_response()
|
||||||
|
resp_200 = self._make_200_response()
|
||||||
|
|
||||||
|
with patch.object(client.session, "get", side_effect=[resp_429, resp_200]):
|
||||||
|
result = client._get_with_retry("http://gitea.local:3000/api/v1/test")
|
||||||
|
|
||||||
|
assert result.status_code == 200
|
||||||
|
mock_sleep.assert_called_once_with(1.0)
|
||||||
|
|
||||||
|
@patch("time.sleep")
|
||||||
|
def test_retry_on_429_exhausted(self, mock_sleep):
|
||||||
|
"""3 consecutive 429 responses: raises HTTPError after exhausting retries."""
|
||||||
|
client = self._make_client()
|
||||||
|
resp_429 = self._make_429_response()
|
||||||
|
|
||||||
|
with patch.object(client.session, "get", return_value=resp_429):
|
||||||
|
with pytest.raises(requests.HTTPError):
|
||||||
|
client._get_with_retry("http://gitea.local:3000/api/v1/test")
|
||||||
|
|
||||||
|
assert mock_sleep.call_count == 2
|
||||||
|
|
||||||
|
@patch("time.sleep")
|
||||||
|
def test_retry_on_429_then_timeout(self, mock_sleep):
|
||||||
|
"""429 followed by Timeout: both retry types handled in same loop."""
|
||||||
|
client = self._make_client()
|
||||||
|
resp_429 = self._make_429_response()
|
||||||
|
resp_200 = self._make_200_response()
|
||||||
|
|
||||||
|
with patch.object(
|
||||||
|
client.session,
|
||||||
|
"get",
|
||||||
|
side_effect=[resp_429, requests.Timeout("timeout"), resp_200],
|
||||||
|
):
|
||||||
|
result = client._get_with_retry("http://gitea.local:3000/api/v1/test")
|
||||||
|
|
||||||
|
assert result.status_code == 200
|
||||||
|
assert mock_sleep.call_count == 2
|
||||||
|
|
||||||
|
|
||||||
class TestGetLatestCommit:
|
class TestGetLatestCommit:
|
||||||
"""Test get_latest_commit method."""
|
"""Test get_latest_commit method."""
|
||||||
|
|
||||||
|
|||||||
@@ -56,6 +56,43 @@ class TestReposToDicts:
|
|||||||
assert field in d, f"Missing field: {field}"
|
assert field in d, f"Missing field: {field}"
|
||||||
|
|
||||||
|
|
||||||
|
class TestSanitizeControlChars:
|
||||||
|
"""Test control character sanitization in export."""
|
||||||
|
|
||||||
|
def test_export_json_sanitizes_control_chars(self):
|
||||||
|
"""Description with control chars (0x00, 0x01, 0x02) produces valid JSON without them."""
|
||||||
|
repo = _make_repo(description="hello\x00\x01\x02world")
|
||||||
|
output = export_json([repo])
|
||||||
|
|
||||||
|
parsed = json.loads(output)
|
||||||
|
assert parsed[0]["description"] == "helloworld"
|
||||||
|
|
||||||
|
def test_export_json_preserves_newlines_tabs(self):
|
||||||
|
"""Newlines and tabs are preserved in JSON export (they are valid JSON escapes)."""
|
||||||
|
repo = _make_repo(description="line1\nline2\ttab")
|
||||||
|
output = export_json([repo])
|
||||||
|
|
||||||
|
parsed = json.loads(output)
|
||||||
|
assert parsed[0]["description"] == "line1\nline2\ttab"
|
||||||
|
|
||||||
|
def test_export_json_unicode_safe(self):
|
||||||
|
"""Description with emojis and accents produces valid JSON."""
|
||||||
|
repo = _make_repo(description="Projet avec accents : e, a et emojis 🚀🎉")
|
||||||
|
output = export_json([repo])
|
||||||
|
|
||||||
|
parsed = json.loads(output)
|
||||||
|
assert "🚀" in parsed[0]["description"]
|
||||||
|
assert "accents" in parsed[0]["description"]
|
||||||
|
|
||||||
|
def test_sanitize_name_and_full_name(self):
|
||||||
|
"""Control chars in name and full_name fields are also sanitized."""
|
||||||
|
repo = _make_repo(name="test\x00repo", full_name="admin/test\x01repo")
|
||||||
|
result = repos_to_dicts([repo])
|
||||||
|
|
||||||
|
assert result[0]["name"] == "testrepo"
|
||||||
|
assert result[0]["full_name"] == "admin/testrepo"
|
||||||
|
|
||||||
|
|
||||||
class TestExportJson:
|
class TestExportJson:
|
||||||
"""Test export_json function."""
|
"""Test export_json function."""
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user