-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathapp.py
More file actions
142 lines (116 loc) · 5.05 KB
/
app.py
File metadata and controls
142 lines (116 loc) · 5.05 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from wordcloud import WordCloud, STOPWORDS, ImageColorGenerator
def load_data(path, info=True):
if len(path.split(".csv")) >1:
read=pd.read_csv(path)
elif len(path.split(".xlsx")) >1:
read=pd.read_csv(path)
if info:
if len(read) > 0:
print(">>> Data imported sucessfully !!")
print(">>> Dimentions: ")
print("------ rows:- ",read.shape[0], "columns:- ",read.shape[1])
print(">>> Missing Value:")
print(np.where(read.isnull().values.any()==False, "------ No Missing value found !", "------ Dataset contains Missing value !"))
print(np.where(read.isnull().values.any()==True, f"------ missing values: \n{read.isnull().sum()}"," "))
else:
print(">>> Data didnot import :( ")
return read
data=load_data('data/netflix_titles.csv')
netflix_shows = data[data['type']=="TV Show"]
netflix_movie = data[data['type']=="Movie"]
class analyze:
""" This contains analyze of data with different features of Netflix movies and Tv Shows
1. analyze_months_and_shows(self,data):- analyzed movie release date yearly and month wise. data=dataset
2. rating(df):- Visulized the type of rating and the amount of rating of the data.
3. release_year_yearly(df):- amount of movies and shows release yearwise.
4. country_analysis(df, graph=True):- Analyzed the countrywise release of movies and shows.
5. duration_analysis_movies(df):- Analyzed the duration of movies in Netflix.
6. duration_analysis_shows(df):- Analyzed the duration of TV Shows in Netflix.
7. generes_analysis(df):- WordCloud for Genres¶
"""
def __init__(self):
pass
def show_graph(self, df):
plt.figure(figsize=(8,7), dpi=200)
plt.pcolor(df, cmap='afmhot_r')
plt.xticks(np.arange(0.5, len(df.columns), 1), df.columns,fontsize=7)
plt.yticks(np.arange(0.5, len(df.index), 1), df.index,fontsize=7)
plt.title("Netflix content update ", fontsize=12)
cbar=plt.colorbar()
cbar.ax.tick_params(labelsize=8)
cbar.ax.minorticks_on()
plt.show()
def analyze_months_and_shows(self,data):
netflix_date = data[['date_added']].dropna()
netflix_date['year'] = netflix_date['date_added'].apply(lambda x : x.split()[-1])
netflix_date['month'] = netflix_date['date_added'].apply(lambda x: x.split()[0])
month_order=['January', 'February', 'March', 'April','May','June','July','August','September','October','November','December'][::-1]
df = netflix_date.groupby('year')['month'].value_counts().unstack().fillna(0)[month_order].T
self.show_graph(df)
def rating(df):
plt.figure(figsize=(12,10))
ax=sns.countplot(x='rating', data=df, order=df['rating'].value_counts().index[0:len(df['rating'].unique())])
return ax
def release_year_yearly(df):
plt.figure(figsize=(12,10))
ax=sns.countplot(y='release_year', data=df, order=df['release_year'].value_counts().index[0:18])
def country_analysis(df, graph=True):
countries={}
country = list(df['country'].fillna('Unknown'))
for i in country:
i=list(i.split(','))
if len(i)==1:
if i in list(countries.keys()):
print(i)
countries[i]+=1
else:
countries[i[0]]=1
else:
for j in i:
if j in list(countries.keys()):
countries[j]+=1
else:
countries[j]=1
final_countries={}
for country, no in countries.items():
country=country.replace(' ', '')
if country in list(final_countries.keys()):
final_countries[country]+=no
else:
final_countries[country]=no
final_countries={k:v for k, v in sorted(final_countries.items(), key=lambda item : item[1] , reverse=True)}
if graph==True:
plt.figure(figsize=(12,10))
ax=sns.barplot(x=list(final_countries.keys())[0:10], y=list(final_countries.values())[0:10])
return final_countries
def duration_analysis_movies(df):
a=df['duration'].str.replace(' min','')
a=a.fillna(0)
a=a.astype(str).astype(int)
sns.kdeplot(data=a, shade=True)
return a
def duration_analysis_shows(df):
a=df['duration'].apply(lambda x: x.split(' ')[0])
a=a.fillna(0)
a=a.astype(str).astype(int)
sns.kdeplot(data=a,shade=True)
return a
def generes_analysis(df):
generes = list(df['listed_in'])
gen=[]
for i in generes:
i = list(i.split(','))
for j in i:
gen.append(j.replace(' ',''))
g=Counter(gen)
text = list(set(gen))
plt.rcParams['figure.figsize'] = (13, 13)
wordcloud = WordCloud(max_words=1000000,background_color="white").generate(str(text))
plt.imshow(wordcloud,interpolation="bilinear")
plt.axis("off")
plt.show()
return g