TechCrunchNews/main.py at main · abirxdhack/TechCrunchNews · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
import asyncio
import aiohttp
import feedparser
from bs4 import BeautifulSoup
from pyrogram import Client, filters, __version__ as pyrogram_version
from pyrogram.types import InlineKeyboardButton, InlineKeyboardMarkup
from pyrogram.enums import ParseMode
from pymongo import MongoClient
import psutil
import os
from config import (
    API_ID,
    API_HASH,
    BOT_TOKEN,
    MONGO_URI,
    DB_NAME,
    COLLECTION_NAME,
    CHANNEL_USERNAME
)
# TechCrunch RSS feed URL
RSS_FEED_URL = "https://techcrunch.com/feed/"

# Initialize Pyrogram bot
app = Client("TechCrunchBot", api_id=API_ID, api_hash=API_HASH, bot_token=BOT_TOKEN, workers=10)

# MongoDB client
mongo_client = MongoClient(MONGO_URI)
db = mongo_client[DB_NAME]
collection = db[COLLECTION_NAME]

# Directory to save downloaded images
IMAGE_DIR = "images"
os.makedirs(IMAGE_DIR, exist_ok=True)

# Async function to fetch and parse RSS feed
async def fetch_rss():
    async with aiohttp.ClientSession() as session:
        async with session.get(RSS_FEED_URL) as response:
            if response.status == 200:
                feed = feedparser.parse(await response.text())
                return feed.entries
            else:
                print(f"Failed to fetch RSS feed. Status code: {response.status}")
                return []

# Async function to scrape details from the article page
async def scrape_article(link):
    async with aiohttp.ClientSession() as session:
        try:
            async with session.get(link) as response:
                if response.status == 200:
                    soup = BeautifulSoup(await response.text(), "html.parser")

                    # Extract image URL
                    image_tag = soup.find("meta", property="og:image")
                    image_url = image_tag["content"] if image_tag and image_tag.get("content") else None

                    # Extract additional details
                    detail_section = soup.find("div", class_="article-content")
                    more_details = detail_section.text.strip()[:500] + "..." if detail_section else "No additional details available."

                    return image_url, more_details
        except Exception as e:
            print(f"Error scraping article {link}: {e}")
        return None, "No additional details available."

# Async function to download an image from a URL
async def download_image(image_url, filename):
    try:
        async with aiohttp.ClientSession() as session:
            async with session.get(image_url) as response:
                if response.status == 200:
                    with open(filename, "wb") as f:
                        f.write(await response.read())
                    return filename
    except Exception as e:
        print(f"Error downloading image: {e}")
    return None

# Async function to fetch and process news
async def fetch_and_process_news(force_post=False):
    news_list = await fetch_rss()
    new_articles = []

    for entry in news_list:
        # If it's the first run, post all articles regardless of duplicates
        if force_post or not collection.find_one({"link": entry.link}):
            image_url, more_details = await scrape_article(entry.link)
            article = {
                "title": entry.title,
                "link": entry.link,
                "summary": entry.summary,
                "image_url": image_url,
                "more_details": more_details
            }
            new_articles.append(article)

            # Add to the database
            collection.insert_one(article)

    return new_articles

# Async function to post articles to Telegram
async def post_articles(articles):
    for article in articles:
        # Download the image
        image_path = None
        if article["image_url"]:
            image_filename = os.path.join(IMAGE_DIR, f"{article['title'].replace(' ', '_')}.jpg")
            image_path = await download_image(article["image_url"], image_filename)

        # Prepare the caption
        caption = (
            f"📌 **{article['title']}**\n\n"
            f"📖 Summary: {article['summary']}\n"
            f"🔍 More Details: {article['more_details']}"
        )

        # URL for sharing
        share_url = f"https://t.me/share/url?url={article['link']}&text=Check@TheSmartDevForMoreInfo"

        # Add custom buttons
        keyboard = InlineKeyboardMarkup(
            [
                [InlineKeyboardButton("♻️ Refresh ♻️", callback_data="refresh_news")],
                [InlineKeyboardButton("🔗 Read More 🔗", url=article['link'])],
                [InlineKeyboardButton("✉️ Join Always Free Tech News ✉️", url="https://t.me/abir_x_official_free_course")],
                [InlineKeyboardButton("🤝 Share 🤝", url=share_url)]
            ]
        )

        # Send the image with caption, or just the caption if no image
        if image_path and os.path.exists(image_path):
            await app.send_photo(CHANNEL_USERNAME, photo=image_path, caption=caption, reply_markup=keyboard, parse_mode=ParseMode.MARKDOWN)
            os.remove(image_path)  # Clean up the downloaded image
        else:
            await app.send_message(CHANNEL_USERNAME, caption, reply_markup=keyboard, parse_mode=ParseMode.MARKDOWN)

        await asyncio.sleep(5)  # Avoid hitting Telegram API limits

# Background task to fetch and post news every 40 minutes
async def periodic_news_fetch():
    # Fetch and post all articles on the first run
    print("First run: Fetching and posting all articles...")
    new_articles = await fetch_and_process_news(force_post=True)
    if new_articles:
        await post_articles(new_articles)
    else:
        print("No articles found during the first run.")

    # Continue fetching and posting new articles every 40 minutes
    while True:
        print("Fetching and posting new articles...")
        new_articles = await fetch_and_process_news()
        if new_articles:
            await post_articles(new_articles)
        else:
            print("No new articles found.")
        await asyncio.sleep(2400)  # Wait 40 minutes

# Callback query handler for "Refresh" button
@app.on_callback_query(filters.regex("refresh_news"))
async def refresh_news_callback(client, callback_query):
    print("Refresh button clicked. Fetching news...")
    new_articles = await fetch_and_process_news()

    if new_articles:
        await post_articles(new_articles)
    else:
        await callback_query.answer("No New News Bruh!", show_alert=True)

# Start the bot
@app.on_message(filters.command("start"))
async def start(client, message):
    await message.reply("🤖 Hello! I'm the TechCrunch News Bot. I'll post the latest tech news to the channel!")

if __name__ == "__main__":
    # Print success message with Pyrogram version and system resource stats
    process = psutil.Process()
    memory_info = process.memory_info()
    cpu_percent = psutil.cpu_percent(interval=1)

    print(f"Bot Is Up And Running On Pyrogram {pyrogram_version}")
    print(f"Memory Usage: {memory_info.rss / 1024 ** 2:.2f} MB")
    print(f"CPU Usage: {cpu_percent}%")

    loop = asyncio.get_event_loop()
    loop.create_task(periodic_news_fetch())  # Start the background task
    app.run()