Skip to content

Commit cb8880a

Browse files
authored
[25.06.07 / TASK-206] Feature - 사용자 모델 (유저네임, 썸네일) 추가, 대응 개발 (#31)
* modify: username 배치 대응 개발, 세부 업데이트 * feature: test code fixture 분리와 userinfo 업데이트 테스트 로직 추가 * modify: update_old_user_info 좀 더 파이써닉 (walrus operator) 하게 업데이트
1 parent ad84cf0 commit cb8880a

File tree

12 files changed

+454
-117
lines changed

12 files changed

+454
-117
lines changed

common/models.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import json
22
from dataclasses import dataclass
3-
from typing import Type, TypeVar
3+
from typing import Any, Type, TypeVar, no_type_check
44

55
from django.db import models
66

@@ -31,13 +31,16 @@ class Meta:
3131
# dataclass 베이스 mixin
3232
@dataclass
3333
class SerializableMixin:
34-
def to_dict(self) -> dict:
34+
@no_type_check
35+
def to_dict(self) -> dict[str, Any]:
3536
return to_dict(self)
3637

37-
def to_json_dict(self) -> dict:
38+
@no_type_check
39+
def to_json_dict(self) -> dict[str, Any]:
3840
"""Django Model의 JSON 필드 저장용"""
3941
return json.loads(json.dumps(self.to_dict()))
4042

4143
@classmethod
42-
def from_dict(cls: Type[T], data: dict) -> T:
44+
@no_type_check
45+
def from_dict(cls: Type[T], data: dict[str, Any]) -> T:
4346
return from_dict(cls, data)

scraping/apis.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import logging
2+
from typing import Any
23

34
from aiohttp.client import ClientSession
45
from aiohttp_retry import ExponentialRetry, RetryClient
@@ -27,7 +28,7 @@ async def fetch_velog_user_chk(
2728
session: ClientSession,
2829
access_token: str,
2930
refresh_token: str,
30-
) -> tuple[dict[str, str], dict[str, str]]:
31+
) -> tuple[dict[str, Any], dict[str, Any]]:
3132
# 토큰 유효성 검증
3233
payload = {"query": CURRENT_USER_QUERY}
3334
headers = get_header(access_token, refresh_token)

scraping/constants.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,9 @@
2121
id
2222
username
2323
email
24+
profile {
25+
thumbnail
26+
}
2427
}
2528
}
2629
"""

scraping/main.py

Lines changed: 87 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import asyncio
22
import logging
3+
from typing import Any
34

45
import aiohttp
56
import async_timeout
@@ -72,10 +73,61 @@ async def update_old_tokens(
7273
sentry_sdk.capture_exception(e)
7374
return False
7475

76+
async def update_old_user_info(
77+
self, user: User, user_data: dict[str, Any]
78+
) -> bool:
79+
"""사용자 프로필 정보 업데이트"""
80+
81+
field_updates = {}
82+
83+
# 각 필드별 업데이트 체크 및 적용
84+
if (new_email := user_data.get("email")) and (
85+
not user.email or user.email != new_email
86+
):
87+
field_updates["email"] = new_email
88+
89+
if (new_username := user_data.get("username")) and (
90+
not user.username or user.username != new_username
91+
):
92+
field_updates["username"] = new_username
93+
94+
if (profile := user_data.get("profile")) and (
95+
new_thumbnail := profile.get("thumbnail")
96+
):
97+
if not user.thumbnail or user.thumbnail != new_thumbnail:
98+
field_updates["thumbnail"] = new_thumbnail
99+
100+
# 업데이트할 필드가 없으면 조기 반환
101+
if not field_updates:
102+
return True
103+
104+
try:
105+
# 필드 일괄 업데이트
106+
for field, value in field_updates.items():
107+
setattr(user, field, value)
108+
109+
await user.asave(update_fields=list(field_updates.keys()))
110+
111+
logger.info(
112+
"Updated user profile fields %s for %s",
113+
list(field_updates.keys()),
114+
user.velog_uuid,
115+
)
116+
return True
117+
118+
except Exception as e:
119+
logger.error(
120+
"Failed to update user info: %s (user velog uuid: %s)",
121+
e,
122+
user.velog_uuid,
123+
)
124+
sentry_sdk.capture_exception(e)
125+
return False
126+
75127
async def bulk_upsert_posts(
76128
self,
77129
user: User,
78-
fetched_posts: list[dict[str, str]],
130+
fetched_posts: list[dict[str, Any]],
79131
batch_size: int = 200,
80132
) -> bool:
81133
"""Post 객체를 일정 크기의 배치로 나눠서 삽입 또는 업데이트"""
@@ -95,7 +147,7 @@ async def bulk_upsert_posts(
95147
return False
96148

97149
async def _upsert_batch(
98-
self, user: User, batch_posts: list[dict[str, str]]
150+
self, user: User, batch_posts: list[dict[str, Any]]
99151
) -> None:
100152
"""단일 배치 처리, bulk_upsert_posts 에서 호출됨"""
101153

@@ -209,7 +261,7 @@ def _execute_sync() -> None:
209261
await _execute_sync()
210262

211263
async def update_daily_statistics(
212-
self, post: dict[str, str], stats: dict[str, str]
264+
self, post: dict[str, Any], stats: dict[str, Any]
213265
) -> None:
214266
"""PostDailyStatistics를 업데이트 또는 생성 (upsert)"""
215267
if not stats or not isinstance(stats, dict):
@@ -222,17 +274,17 @@ async def update_daily_statistics(
222274
today = get_local_now().date()
223275
post_id = post["id"]
224276

225-
stats_data = stats.get("data", {}) # type: ignore
277+
stats_data = stats.get("data", {})
226278
if not stats_data or not isinstance(
227-
stats_data.get("getStats"), # type: ignore
279+
stats_data.get("getStats"),
228280
dict,
229281
):
230282
logger.warning(
231283
f"Skip updating statistics due to missing getStats data for post {post_id}"
232284
)
233285
return
234286

235-
view_count = stats_data["getStats"].get("total", 0) # type: ignore
287+
view_count = stats_data["getStats"].get("total", 0)
236288
like_count = post.get("likes", 0)
237289

238290
# 트랜잭션 내에서 실행
@@ -331,29 +383,50 @@ async def process_user(
331383
origin_access_token = aes_encryption.decrypt(user.access_token)
332384
origin_refresh_token = aes_encryption.decrypt(user.refresh_token)
333385

386+
# ========================================================== #
387+
# STEP1: 토큰이 유효성 체크 및 업데이트. 이후 사용자 정보 업데이트
388+
# ========================================================== #
389+
334390
# 토큰 유효성 검증
335391
new_user_cookies, user_data = await fetch_velog_user_chk(
336392
session,
337393
origin_access_token,
338394
origin_refresh_token,
339395
)
340-
if not (user_data or new_user_cookies):
341-
return
342396

343-
if user_data["data"]["currentUser"] is None: # type: ignore
397+
if (
398+
not (user_data or new_user_cookies)
399+
or user_data.get("data", {}).get("currentUser") is None
400+
):
344401
logger.warning(
345402
f"Failed to fetch user data because of wrong tokens. (user velog uuid: {user.velog_uuid})"
346403
)
347404
return
348405

349406
if new_user_cookies:
350-
await self.update_old_tokens(
407+
user_token_result = await self.update_old_tokens(
351408
user,
352409
aes_encryption,
353410
new_user_cookies,
354411
)
412+
if not user_token_result:
413+
raise Exception("Failed to update tokens, Check the logs")
414+
origin_access_token = new_user_cookies["access_token"]
415+
origin_refresh_token = new_user_cookies["refresh_token"]
416+
417+
# velog 응답과 기존 저장된 사용자 정보 비교 및 업데이트
418+
# user_data -> currentUser 에는 id / username / email / profile { thumbnail } 존재
419+
user_info_result = await self.update_old_user_info(
420+
user,
421+
user_data["data"]["currentUser"],
422+
)
423+
if not user_info_result:
424+
raise Exception("Failed to update user_info, Check the logs")
355425

356-
username = user_data["data"]["currentUser"]["username"] # type: ignore
426+
# ========================================================== #
427+
# STEP2: 게시물 전체 목록을 가져와서 upsert 와 상태 동기화 (비활성, 활성)
428+
# ========================================================== #
429+
username = user_data["data"]["currentUser"]["username"]
357430
fetched_posts = await fetch_all_velog_posts(
358431
session, username, origin_access_token, origin_refresh_token
359432
)
@@ -370,6 +443,9 @@ async def process_user(
370443
user, all_post_ids, min_posts_threshold=1
371444
)
372445

446+
# ========================================================== #
447+
# STEP3: 게시물 전체 목록을 기반으로 세부 통계 가져와서 upsert
448+
# ========================================================== #
373449
# 게시물을 적절한 크기의 청크로 나누어 처리
374450
chunk_size = 20
375451
for i in range(0, len(fetched_posts), chunk_size):

scraping/tests/conftest.py

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
import uuid
2+
3+
import pytest
4+
5+
from scraping.main import Scraper
6+
from users.models import User
7+
8+
9+
@pytest.fixture
10+
def scraper():
11+
"""Scraper 인스턴스 생성"""
12+
return Scraper(group_range=range(1, 10), max_connections=10)
13+
14+
15+
@pytest.fixture
16+
def user(db):
17+
"""테스트용 User 객체 생성"""
18+
return User.objects.create(
19+
velog_uuid=uuid.uuid4(),
20+
access_token="encrypted-access-token",
21+
refresh_token="encrypted-refresh-token",
22+
group_id=1,
23+
email="test@example.com",
24+
username="nuung",
25+
thumbnail="https://nuung.com",
26+
is_active=True,
27+
)
28+
29+
30+
@pytest.fixture
31+
def mock_user_data():
32+
"""테스트용 user_data 구조"""
33+
return {
34+
"data": {
35+
"currentUser": {
36+
"id": "user-123",
37+
"email": "test@example.com",
38+
"username": "testuser",
39+
"profile": {"thumbnail": "https://example.com/thumbnail.jpg"},
40+
}
41+
}
42+
}
43+
44+
45+
@pytest.fixture
46+
def mock_new_tokens():
47+
"""테스트용 새 토큰"""
48+
return {
49+
"access_token": "new-access-token",
50+
"refresh_token": "new-refresh-token",
51+
}
52+
53+
54+
@pytest.fixture
55+
def mock_posts_data():
56+
"""테스트용 게시물 데이터"""
57+
return [
58+
{
59+
"id": str(uuid.uuid4()),
60+
"title": "Test Post 1",
61+
"url_slug": "test-post-1",
62+
"released_at": "2024-01-01T00:00:00Z",
63+
"likes": 15,
64+
},
65+
{
66+
"id": str(uuid.uuid4()),
67+
"title": "Test Post 2",
68+
"url_slug": "test-post-2",
69+
"released_at": "2024-01-02T00:00:00Z",
70+
"likes": 25,
71+
},
72+
]
73+
74+
75+
@pytest.fixture
76+
def mock_stats_data():
77+
"""테스트용 통계 데이터"""
78+
return {"data": {"getStats": {"total": 150}}}

scraping/tests/test_main_posts.py

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -5,29 +5,11 @@
55
from asgiref.sync import sync_to_async
66

77
from posts.models import Post
8-
from scraping.main import Scraper
98
from users.models import User
109
from utils.utils import get_local_now
1110

1211

1312
class TestScraperPosts:
14-
@pytest.fixture
15-
def scraper(self):
16-
"""Scraper 인스턴스 생성"""
17-
return Scraper(group_range=range(1, 10), max_connections=10)
18-
19-
@pytest.fixture
20-
def user(self, db):
21-
"""테스트용 User 객체 생성"""
22-
return User.objects.create(
23-
velog_uuid=uuid.uuid4(),
24-
access_token="encrypted-access-token",
25-
refresh_token="encrypted-refresh-token",
26-
group_id=1,
27-
email="test@example.com",
28-
is_active=True,
29-
)
30-
3113
@pytest.mark.asyncio
3214
async def test_bulk_upsert_posts_success(self, scraper, user):
3315
"""Post 객체 배치 분할 삽입 또는 업데이트 성공 테스트"""

scraping/tests/test_main_statistics.py

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -5,29 +5,11 @@
55
from asgiref.sync import sync_to_async
66

77
from posts.models import Post, PostDailyStatistics
8-
from scraping.main import Scraper
98
from users.models import User
109
from utils.utils import get_local_now
1110

1211

1312
class TestScraperStatistics:
14-
@pytest.fixture
15-
def scraper(self):
16-
"""Scraper 인스턴스 생성"""
17-
return Scraper(group_range=range(1, 10), max_connections=10)
18-
19-
@pytest.fixture
20-
def user(self, db):
21-
"""테스트용 User 객체 생성"""
22-
return User.objects.create(
23-
velog_uuid=uuid.uuid4(),
24-
access_token="encrypted-access-token",
25-
refresh_token="encrypted-refresh-token",
26-
group_id=1,
27-
email="test@example.com",
28-
is_active=True,
29-
)
30-
3113
@pytest.mark.asyncio
3214
async def test_update_daily_statistics_success(self, scraper):
3315
"""데일리 통계 업데이트 또는 생성 성공 테스트"""

0 commit comments

Comments
 (0)