Skip to content

Commit 886ba17

Browse files
authored
[PGS] 17677 [1차] 뉴스 클러스터링 (Lv.2)
1 parent ee3c281 commit 886ba17

1 file changed

Lines changed: 45 additions & 0 deletions

File tree

박예진/6주차/260202.py

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
from collections import Counter
2+
3+
# 알파벳인지 확인
4+
def is_alpha(c):
5+
return ('a' <= c <= 'z') or ('A' <= c <= 'Z')
6+
7+
# 2글자씩 묶기
8+
def make_multiset(s):
9+
arr = []
10+
for i in range(len(s) - 1):
11+
if is_alpha(s[i]) and is_alpha(s[i + 1]):
12+
arr.append((s[i] + s[i + 1]).lower())
13+
14+
return arr
15+
16+
def solution(str1, str2):
17+
answer = 0
18+
19+
arr1 = make_multiset(str1)
20+
arr2 = make_multiset(str2)
21+
22+
m1 = Counter(arr1)
23+
m2 = Counter(arr2)
24+
25+
intersection = 0
26+
union = 0
27+
28+
# 교집합
29+
for key in m1:
30+
if key in m2:
31+
intersection += min(m1[key], m2[key])
32+
33+
# 합집합
34+
keys = set(m1.keys()) | set(m2.keys())
35+
36+
for key in keys:
37+
c1 = m1.get(key, 0)
38+
c2 = m2.get(key, 0)
39+
union += max(c1, c2)
40+
41+
# 자카드 유사도
42+
if union == 0:
43+
return 65536
44+
45+
return int((intersection / union) * 65536)

0 commit comments

Comments
 (0)