Job-Scraper-Analyzer-basic/analyzer.py at main · ramshawork/Job-Scraper-Analyzer-basic · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99

import pandas as pd
from collections import Counter


# CSV file ko load karna hai

def load_data(filename='jobs_data.csv'):
    """CSV file ko pandas DataFrame mein load karega"""
    try:
        df = pd.read_csv(filename)
        print(f"Data loaded: {len(df)} jobs found\n")
        return df
    except FileNotFoundError:
        print(f"File '{filename}' nahi mili!")
        return None


# ANALYSIS FUNCTIONS

def analyze_top_locations(df, top_n=5):
    """
    Top N locations with most jobs

    LOGIC: Location column ko count karke sort karte hain
    """
    print(f"📍 Top {top_n} Locations with Most Jobs:")
    print("-" * 40)

    location_counts = df['Location'].value_counts().head(top_n)

    for i, (location, count) in enumerate(location_counts.items(), 1):
        print(f"{i}. {location}: {count} jobs")

    return location_counts


def analyze_top_companies(df, top_n=5):
    """Top companies hiring the most"""
    print(f"\n Top {top_n} Companies Hiring:")
    print("-" * 40)

    company_counts = df['Company'].value_counts().head(top_n)

    for i, (company, count) in enumerate(company_counts.items(), 1):
        print(f"{i}. {company}: {count} openings")

    return company_counts


def analyze_job_titles(df):
    """Most common job titles (word frequency)"""
    print(f"\n Most Common Job Keywords:")
    print("-" * 40)

    # Saare titles ko ek string mein combine
    all_titles = ' '.join(df['Job Title'].values)

    # Words ko split karega aur count
    words = all_titles.lower().split()

    # Common words ko filter
    stop_words = {'and', 'or', 'the', 'a', 'an', 'in', 'of', 'for'}
    filtered_words = [
        word for word in words if word not in stop_words and len(word) > 2]

    # Top 10 keywords
    word_freq = Counter(filtered_words).most_common(10)

    for i, (word, count) in enumerate(word_freq, 1):
        print(f"{i}. {word}: {count} times")

    return word_freq

# MAIN ANALYSIS


if __name__ == "__main__":
    print("=" * 50)
    print("DATA ANALYSIS STARTING...")
    print("=" * 50 + "\n")

    # Data load
    df = load_data()

    if df is not None:
        # Basic info
        print(f"Total Jobs: {len(df)}")
        print(f"Total Companies: {df['Company'].nunique()}")
        print(f"Total Locations: {df['Location'].nunique()}\n")

        # Analysis run hoga
        analyze_top_locations(df, top_n=5)
        analyze_top_companies(df, top_n=5)
        analyze_job_titles(df)

        print("\n" + "=" * 50)
        print("Analysis Complete!")
        print("=" * 50)