mamba413 commited on
Commit
33df8f4
·
verified ·
1 Parent(s): 28c4ec8

Upload stats.py

Browse files
Files changed (1) hide show
  1. src/stats.py +107 -0
src/stats.py ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ import threading
4
+ from datetime import datetime
5
+ from pathlib import Path
6
+ from typing import Optional
7
+ from huggingface_hub import HfApi, upload_file, hf_hub_download
8
+
9
+
10
+ class StatsManager:
11
+ STATS_FILENAME = "stats.json"
12
+ WRITE_INTERVAL_SECONDS = 60
13
+
14
+ def __init__(self, dataset_repo_id: str, hf_token: Optional[str] = None, local_backup: bool = True):
15
+ self.dataset_repo_id = dataset_repo_id
16
+ self.hf_token = hf_token or os.environ.get("HF_TOKEN")
17
+ self.local_backup = local_backup
18
+ self._lock = threading.Lock()
19
+
20
+ self.api = HfApi(token=self.hf_token) if (self.dataset_repo_id and self.hf_token) else None
21
+
22
+ if os.environ.get("SPACE_ID"):
23
+ self.local_dir = Path("/tmp/feedback_data")
24
+ else:
25
+ self.local_dir = Path(__file__).parent / "feedback_data"
26
+ self.local_dir.mkdir(exist_ok=True, parents=True)
27
+ self.local_file = self.local_dir / self.STATS_FILENAME
28
+
29
+ self._stats = self._load_stats()
30
+ self._last_hf_write = datetime.min
31
+
32
+ def _load_stats(self) -> dict:
33
+ if self.api and self.dataset_repo_id:
34
+ try:
35
+ local_path = hf_hub_download(
36
+ repo_id=self.dataset_repo_id,
37
+ filename=self.STATS_FILENAME,
38
+ repo_type="dataset",
39
+ token=self.hf_token,
40
+ force_download=True,
41
+ cache_dir=str(self.local_dir)
42
+ )
43
+ with open(local_path, "r", encoding="utf-8") as f:
44
+ return json.load(f)
45
+ except Exception as e:
46
+ if "404" not in str(e):
47
+ print(f"[StatsManager] HF load warning: {e}")
48
+
49
+ if self.local_file.exists():
50
+ try:
51
+ with open(self.local_file, "r", encoding="utf-8") as f:
52
+ return json.load(f)
53
+ except Exception as e:
54
+ print(f"[StatsManager] Local load error: {e}")
55
+
56
+ now = datetime.now().isoformat()
57
+ return {"visit_count": 0, "detection_count": 0, "last_updated": now, "created_at": now}
58
+
59
+ def _write_to_local(self):
60
+ try:
61
+ with open(self.local_file, "w", encoding="utf-8") as f:
62
+ json.dump(self._stats, f, indent=2)
63
+ except Exception as e:
64
+ print(f"[StatsManager] Local write error: {e}")
65
+
66
+ def _upload_to_hf(self):
67
+ if not (self.api and self.dataset_repo_id):
68
+ return
69
+ try:
70
+ upload_file(
71
+ path_or_fileobj=str(self.local_file),
72
+ path_in_repo=self.STATS_FILENAME,
73
+ repo_id=self.dataset_repo_id,
74
+ repo_type="dataset",
75
+ token=self.hf_token,
76
+ commit_message="stats: visits={} detections={}".format(
77
+ self._stats["visit_count"], self._stats["detection_count"]
78
+ )
79
+ )
80
+ except Exception as e:
81
+ print(f"[StatsManager] HF upload error (non-fatal): {e}")
82
+
83
+ def _maybe_flush(self):
84
+ self._stats["last_updated"] = datetime.now().isoformat()
85
+ self._write_to_local()
86
+ elapsed = (datetime.now() - self._last_hf_write).total_seconds()
87
+ if elapsed >= self.WRITE_INTERVAL_SECONDS:
88
+ self._upload_to_hf()
89
+ self._last_hf_write = datetime.now()
90
+
91
+ def increment_visit(self):
92
+ with self._lock:
93
+ self._stats["visit_count"] += 1
94
+ self._maybe_flush()
95
+
96
+ def increment_detection(self):
97
+ with self._lock:
98
+ self._stats["detection_count"] += 1
99
+ self._maybe_flush()
100
+
101
+ @property
102
+ def visit_count(self) -> int:
103
+ return self._stats.get("visit_count", 0)
104
+
105
+ @property
106
+ def detection_count(self) -> int:
107
+ return self._stats.get("detection_count", 0)