# Importer les paykages
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
# Ouvrir la base de données
df_movies = pd.read_csv('/content/drive/MyDrive/Movie project/tmdb_movies_data.csv', low_memory=False)
df_movies.head(2)
id | imdb_id | popularity | budget | revenue | original_title | cast | homepage | director | tagline | ... | overview | runtime | genres | production_companies | release_date | vote_count | vote_average | release_year | budget_adj | revenue_adj | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 135397 | tt0369610 | 32.985763 | 150000000 | 1513528810 | Jurassic World | Chris Pratt|Bryce Dallas Howard|Irrfan Khan|Vi... | http://www.jurassicworld.com/ | Colin Trevorrow | The park is open. | ... | Twenty-two years after the events of Jurassic ... | 124 | Action|Adventure|Science Fiction|Thriller | Universal Studios|Amblin Entertainment|Legenda... | 6/9/2015 | 5562 | 6.5 | 2015 | 137999939.3 | 1.392446e+09 |
1 | 76341 | tt1392190 | 28.419936 | 150000000 | 378436354 | Mad Max: Fury Road | Tom Hardy|Charlize Theron|Hugh Keays-Byrne|Nic... | http://www.madmaxmovie.com/ | George Miller | What a Lovely Day. | ... | An apocalyptic story set in the furthest reach... | 120 | Action|Adventure|Science Fiction|Thriller | Village Roadshow Pictures|Kennedy Miller Produ... | 5/13/2015 | 6185 | 7.1 | 2015 | 137999939.3 | 3.481613e+08 |
2 rows × 21 columns
df_movies.shape
(10866, 21)
Nous avons 10866 lignes et 21 colonnes
len(df_movies)
10866
df_movies.shape[0]
10866
df_movies.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 10866 entries, 0 to 10865 Data columns (total 21 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 id 10866 non-null int64 1 imdb_id 10856 non-null object 2 popularity 10866 non-null float64 3 budget 10866 non-null int64 4 revenue 10866 non-null int64 5 original_title 10866 non-null object 6 cast 10790 non-null object 7 homepage 2936 non-null object 8 director 10822 non-null object 9 tagline 8042 non-null object 10 keywords 9373 non-null object 11 overview 10862 non-null object 12 runtime 10866 non-null int64 13 genres 10843 non-null object 14 production_companies 9836 non-null object 15 release_date 10866 non-null object 16 vote_count 10866 non-null int64 17 vote_average 10866 non-null float64 18 release_year 10866 non-null int64 19 budget_adj 10866 non-null float64 20 revenue_adj 10866 non-null float64 dtypes: float64(4), int64(6), object(11) memory usage: 1.7+ MB
df_movies.describe()
id | popularity | budget | revenue | runtime | vote_count | vote_average | release_year | budget_adj | revenue_adj | |
---|---|---|---|---|---|---|---|---|---|---|
count | 10866.000000 | 10866.000000 | 1.086600e+04 | 1.086600e+04 | 10866.000000 | 10866.000000 | 10866.000000 | 10866.000000 | 1.086600e+04 | 1.086600e+04 |
mean | 66064.177434 | 0.646441 | 1.462570e+07 | 3.982332e+07 | 102.070863 | 217.389748 | 5.974922 | 2001.322658 | 1.755104e+07 | 5.136436e+07 |
std | 92130.136561 | 1.000185 | 3.091321e+07 | 1.170035e+08 | 31.381405 | 575.619058 | 0.935142 | 12.812941 | 3.430616e+07 | 1.446325e+08 |
min | 5.000000 | 0.000065 | 0.000000e+00 | 0.000000e+00 | 0.000000 | 10.000000 | 1.500000 | 1960.000000 | 0.000000e+00 | 0.000000e+00 |
25% | 10596.250000 | 0.207583 | 0.000000e+00 | 0.000000e+00 | 90.000000 | 17.000000 | 5.400000 | 1995.000000 | 0.000000e+00 | 0.000000e+00 |
50% | 20669.000000 | 0.383856 | 0.000000e+00 | 0.000000e+00 | 99.000000 | 38.000000 | 6.000000 | 2006.000000 | 0.000000e+00 | 0.000000e+00 |
75% | 75610.000000 | 0.713817 | 1.500000e+07 | 2.400000e+07 | 111.000000 | 145.750000 | 6.600000 | 2011.000000 | 2.085325e+07 | 3.369710e+07 |
max | 417859.000000 | 32.985763 | 4.250000e+08 | 2.781506e+09 | 900.000000 | 9767.000000 | 9.200000 | 2015.000000 | 4.250000e+08 | 2.827124e+09 |
df_movies.duplicated().sum()
1
df_movies.drop_duplicates(inplace=True)
# Vérifier le resultat
df_movies.duplicated().sum()
0
df_movies.isnull().sum()
id 0 imdb_id 10 popularity 0 budget 0 revenue 0 original_title 0 cast 76 homepage 7929 director 44 tagline 2824 keywords 1493 overview 4 runtime 0 genres 23 production_companies 1030 release_date 0 vote_count 0 vote_average 0 release_year 0 budget_adj 0 revenue_adj 0 dtype: int64
df_movies.isnull().sum().sum()
13433
df_movies.columns
Index(['id', 'imdb_id', 'popularity', 'budget', 'revenue', 'original_title', 'cast', 'homepage', 'director', 'tagline', 'keywords', 'overview', 'runtime', 'genres', 'production_companies', 'release_date', 'vote_count', 'vote_average', 'release_year', 'budget_adj', 'revenue_adj'], dtype='object')
deleted_columns = ['id', 'imdb_id', 'budget', 'revenue', 'cast', 'homepage', 'director', 'tagline','keywords',
'overview', 'runtime', 'production_companies', 'release_date']
df_movies.drop(deleted_columns, axis=1, inplace=True)
df_movies.head()
popularity | original_title | genres | vote_count | vote_average | release_year | budget_adj | revenue_adj | |
---|---|---|---|---|---|---|---|---|
0 | 32.985763 | Jurassic World | Action|Adventure|Science Fiction|Thriller | 5562 | 6.5 | 2015 | 137999939.3 | 1.392446e+09 |
1 | 28.419936 | Mad Max: Fury Road | Action|Adventure|Science Fiction|Thriller | 6185 | 7.1 | 2015 | 137999939.3 | 3.481613e+08 |
2 | 13.112507 | Insurgent | Adventure|Science Fiction|Thriller | 2480 | 6.3 | 2015 | 101199955.5 | 2.716190e+08 |
3 | 11.173104 | Star Wars: The Force Awakens | Action|Adventure|Science Fiction|Fantasy | 5292 | 7.5 | 2015 | 183999919.0 | 1.902723e+09 |
4 | 9.335014 | Furious 7 | Action|Crime|Thriller | 2947 | 7.3 | 2015 | 174799923.1 | 1.385749e+09 |
# Vérifier des valeurs manquantes
df_movies.isnull().sum()
popularity 0 original_title 0 genres 23 vote_count 0 vote_average 0 release_year 0 budget_adj 0 revenue_adj 0 dtype: int64
df_movies.dropna(inplace=True)
df_movies.shape
(10842, 8)
# Vérifier des valeurs manquantes
df_movies.isnull().sum()
popularity 0 original_title 0 genres 0 vote_count 0 vote_average 0 release_year 0 budget_adj 0 revenue_adj 0 dtype: int64
"genres"
¶Regular Expressions
df_movies.genres[0]
'Action|Adventure|Science Fiction|Thriller'
list_genre=[]
for i in df_movies['genres']:
i=i.split('|')
list_genre.append(i)
list_genre
[['Action', 'Adventure', 'Science Fiction', 'Thriller'], ['Action', 'Adventure', 'Science Fiction', 'Thriller'], ['Adventure', 'Science Fiction', 'Thriller'], ['Action', 'Adventure', 'Science Fiction', 'Fantasy'], ['Action', 'Crime', 'Thriller'], ['Western', 'Drama', 'Adventure', 'Thriller'], ['Science Fiction', 'Action', 'Thriller', 'Adventure'], ['Drama', 'Adventure', 'Science Fiction'], ['Family', 'Animation', 'Adventure', 'Comedy'], ['Comedy', 'Animation', 'Family'], ['Action', 'Adventure', 'Crime'], ['Science Fiction', 'Fantasy', 'Action', 'Adventure'], ['Drama', 'Science Fiction'], ['Action', 'Comedy', 'Science Fiction'], ['Action', 'Adventure', 'Science Fiction'], ['Crime', 'Drama', 'Mystery', 'Western'], ['Crime', 'Action', 'Thriller'], ['Science Fiction', 'Action', 'Adventure'], ['Romance', 'Fantasy', 'Family', 'Drama'], ['War', 'Adventure', 'Science Fiction'], ['Action', 'Family', 'Science Fiction', 'Adventure', 'Mystery'], ['Action', 'Drama'], ['Action', 'Drama', 'Thriller'], ['Drama', 'Romance'], ['Comedy', 'Drama'], ['Action'], ['Comedy'], ['Crime', 'Comedy', 'Action', 'Adventure'], ['Drama', 'Thriller', 'History'], ['Action', 'Science Fiction', 'Thriller'], ['Mystery', 'Drama'], ['Crime', 'Action', 'Science Fiction'], ['Comedy', 'Music'], ['Thriller', 'Drama'], ['Adventure', 'Horror', 'Comedy'], ['Drama', 'Thriller'], ['Crime', 'Drama', 'Mystery'], ['Adventure', 'Animation', 'Comedy', 'Family', 'Fantasy'], ['Action', 'Crime', 'Drama', 'Mystery', 'Thriller'], ['Drama', 'Romance'], ['Drama', 'Music'], ['Fantasy', 'Action', 'Adventure'], ['History', 'Drama'], ['Comedy', 'Action', 'Adventure'], ['Fantasy', 'Drama', 'Romance'], ['Action', 'Adventure', 'Science Fiction'], ['Fantasy', 'Comedy', 'Animation', 'Science Fiction', 'Family'], ['Drama', 'Mystery', 'Romance'], ['Thriller', 'Crime', 'Drama'], ['Thriller', 'Comedy', 'Drama', 'Romance', 'Science Fiction'], ['Romance', 'Drama'], ['Comedy'], ['Adventure', 'Drama'], ['Comedy', 'Drama'], ['Action', 'Crime', 'Thriller'], ['Drama'], ['Science Fiction', 'Mystery', 'Thriller'], ['Comedy', 'Adventure'], ['Drama'], ['Mystery', 'Crime', 'Action', 'Thriller', 'Drama'], ['Action', 'Crime', 'Drama', 'Mystery', 'Thriller'], ['Action', 'Adventure', 'Science Fiction'], ['Mystery', 'Horror'], ['Action', 'Comedy', 'Crime'], ['Romance', 'Comedy', 'Crime', 'Drama'], ['Action', 'Crime', 'Thriller'], ['Thriller', 'Drama', 'Adventure', 'Action', 'History'], ['Crime', 'Thriller', 'Action'], ['Drama'], ['Comedy'], ['Drama', 'Thriller', 'War'], ['Crime', 'Thriller'], ['Thriller'], ['Adventure', 'Family', 'Fantasy'], ['Action', 'Adventure', 'Fantasy'], ['Comedy', 'Drama'], ['Adventure', 'Animation', 'Comedy', 'Family'], ['Drama'], ['Comedy'], ['Drama', 'Horror', 'Thriller'], ['Romance', 'Drama'], ['Animation', 'Comedy', 'Family'], ['Family', 'Comedy'], ['Adventure', 'Drama', 'Thriller'], ['Action', 'Crime', 'Drama'], ['Adventure', 'Comedy'], ['Horror', 'Thriller'], ['Horror'], ['Drama', 'Romance', 'Science Fiction'], ['Crime', 'Thriller'], ['Thriller', 'Mystery'], ['Comedy'], ['Fantasy', 'Action', 'Adventure'], ['Thriller', 'Science Fiction', 'Action', 'Adventure'], ['Adventure', 'Animation', 'Fantasy'], ['Adventure', 'Animation', 'Comedy', 'Family'], ['Drama'], ['Romance', 'Comedy'], ['Horror'], ['Action', 'Adventure', 'Comedy', 'Family'], ['Adventure', 'Animation', 'Family'], ['Action', 'Drama', 'Science Fiction'], ['Thriller'], ['Thriller', 'Action'], ['Comedy'], ['Comedy'], ['Comedy', 'Horror'], ['Horror', 'Thriller'], ['Crime', 'Drama'], ['Crime', 'Action', 'Thriller'], ['Horror', 'Comedy', 'Fantasy'], ['Drama', 'Mystery', 'Thriller'], ['Action', 'Thriller'], ['Crime', 'Drama', 'Comedy'], ['Comedy', 'Action'], ['Drama'], ['Action', 'Fantasy', 'Adventure'], ['Comedy'], ['Science Fiction', 'Thriller'], ['Comedy', 'Science Fiction'], ['Thriller', 'Action', 'Crime'], ['Mystery', 'Thriller', 'Fantasy', 'Horror', 'Drama'], ['Thriller', 'Action'], ['Comedy', 'Drama', 'Music'], ['Horror', 'Thriller'], ['Romance', 'Thriller', 'Western'], ['Drama'], ['Crime', 'Drama', 'Mystery'], ['Comedy'], ['Family', 'Animation'], ['Crime', 'Drama', 'Mystery'], ['Adventure', 'Drama', 'Family'], ['Family', 'Animation', 'Comedy', 'Adventure'], ['Drama'], ['Comedy', 'Drama'], ['Action', 'Drama', 'Crime'], ['Horror', 'Thriller'], ['Action', 'Crime', 'Comedy'], ['Drama', 'Horror', 'Thriller'], ['Drama', 'Science Fiction', 'Thriller'], ['Action', 'Adventure', 'Fantasy'], ['Comedy', 'Drama'], ['Drama', 'Science Fiction', 'Thriller'], ['Adventure', 'Drama', 'Family'], ['Animation', 'Comedy', 'Drama', 'Romance'], ['Horror', 'Western', 'Adventure', 'Drama'], ['Horror', 'Mystery', 'Thriller'], ['Drama'], ['Drama'], ['Thriller', 'Action'], ['Drama', 'Romance'], ['Horror', 'Thriller'], ['Horror', 'Thriller', 'Comedy', 'Crime'], ['Action', 'Adventure'], ['Romance', 'Fantasy', 'Horror'], ['Mystery', 'Drama'], ['Drama', 'History'], ['Comedy', 'Drama', 'Music'], ['Comedy', 'Drama', 'Romance'], ['Action', 'Comedy', 'Science Fiction', 'Fantasy'], ['Drama', 'Thriller'], ['Crime', 'Drama', 'Mystery', 'Thriller'], ['Drama', 'Comedy'], ['Comedy'], ['Drama', 'Action', 'Crime', 'Thriller'], ['Comedy', 'Western'], ['Comedy', 'Thriller'], ['History', 'Drama'], ['Drama', 'History'], ['Drama'], ['Drama', 'Music', 'Romance'], ['Adventure', 'Action', 'Comedy'], ['History', 'Drama', 'War'], ['Action', 'Thriller'], ['Comedy', 'Drama'], ['Music', 'Action', 'Adventure', 'Comedy', 'Family'], ['Music', 'Romance', 'Comedy'], ['Romance', 'Drama'], ['Drama'], ['Drama', 'Comedy'], ['Comedy', 'Drama', 'War'], ['Action', 'Adventure', 'Animation', 'Family'], ['Drama', 'Thriller'], ['Adventure', 'Comedy', 'Romance'], ['Romance', 'Comedy'], ['Action', 'Animation', 'Science Fiction'], ['Drama', 'Comedy'], ['Drama', 'Science Fiction'], ['Thriller'], ['Drama'], ['Animation', 'Comedy', 'Family', 'Adventure'], ['Thriller'], ['Thriller'], ['Comedy'], ['Western', 'Drama'], ['Documentary', 'Music'], ['Drama'], ['Fantasy', 'Horror'], ['Family', 'Animation'], ['Horror'], ['Romance', 'Drama'], ['Romance', 'Comedy'], ['Drama', 'Romance'], ['Drama'], ['Thriller'], ['Comedy', 'Drama', 'Music'], ['Comedy', 'Music', 'War'], ['Drama'], ['Comedy', 'Drama', 'Romance'], ['Thriller', 'Horror'], ['Drama'], ['Comedy'], ['Romance', 'Drama'], ['Drama'], ['Drama'], ['Mystery', 'Science Fiction', 'Thriller'], ['Animation', 'Comedy', 'Family', 'Fantasy', 'Music'], ['Thriller', 'Science Fiction'], ['Action'], ['Drama', 'Thriller'], ['Drama'], ['Drama', 'Horror', 'Science Fiction'], ['Mystery', 'Drama', 'Horror'], ['Drama', 'Adventure', 'Comedy'], ['Horror'], ['Thriller', 'Horror'], ['Action', 'Adventure', 'Animation'], ['Thriller'], ['Adventure', 'Documentary'], ['Comedy'], ['Fantasy', 'Thriller'], ['Music', 'Documentary'], ['War', 'Drama'], ['Thriller', 'Crime', 'Drama'], ['Drama'], ['Drama'], ['Horror', 'Mystery'], ['Drama'], ['Drama', 'Comedy'], ['Animation', 'Adventure', 'Comedy'], ['Thriller', 'Horror'], ['Documentary'], ['Horror', 'Thriller'], ['Drama', 'Comedy'], ['Horror', 'Thriller'], ['History', 'Drama'], ['Drama', 'Romance'], ['Drama'], ['Horror', 'TV Movie'], ['Thriller', 'Horror'], ['Action'], ['Drama', 'Thriller'], ['Crime', 'Drama'], ['Drama', 'Thriller'], ['Thriller', 'Horror'], ['Fantasy', 'Comedy', 'Animation'], ['Adventure', 'Science Fiction', 'Action'], ['Action'], ['Action', 'Adventure', 'Animation', 'Family'], ['TV Movie'], ['Action', 'Drama', 'Family'], ['Drama'], ['Horror', 'Thriller'], ['Horror', 'Thriller'], ['Drama', 'Documentary'], ['Comedy'], ['Comedy'], ['Music', 'Drama', 'Crime', 'Comedy'], ['Comedy', 'Action'], ['Music', 'Animation', 'Family', 'Fantasy'], ['Drama'], ['Horror', 'Science Fiction'], ['Comedy', 'Horror'], ['Action', 'Animation', 'Fantasy'], ['Drama', 'Music'], ['Action', 'Drama', 'Thriller'], ['Fantasy', 'Thriller', 'Horror', 'Science Fiction'], ['Action', 'Science Fiction', 'Adventure', 'Fantasy'], ['Thriller', 'Drama', 'Horror'], ['Animation', 'Family'], ['Drama'], ['Thriller', 'Drama'], ['Horror', 'Comedy'], ['Thriller', 'Horror', 'Comedy', 'Drama'], ['Comedy', 'Music'], ['Thriller', 'Comedy', 'Crime'], ['Family', 'Science Fiction', 'Action', 'Adventure'], ['Drama', 'Music', 'Adventure', 'Fantasy'], ['Thriller', 'Action', 'Drama'], ['Action', 'Horror', 'Thriller'], ['Documentary'], ['Drama', 'War'], ['Horror'], ['Drama'], ['Thriller', 'Mystery', 'Science Fiction'], ['Family', 'TV Movie'], ['Thriller', 'Horror'], ['Thriller', 'Horror'], ['Crime', 'Horror', 'Thriller'], ['Thriller', 'Drama', 'Comedy'], ['Drama'], ['Thriller', 'Action'], ['Drama'], ['Fantasy', 'Action', 'Adventure'], ['Drama', 'Comedy'], ['Family', 'TV Movie', 'Animation'], ['Thriller', 'Romance', 'Drama'], ['Science Fiction', 'Action'], ['Drama', 'Music'], ['Thriller', 'Action'], ['Horror'], ['Adventure', 'Action'], ['Thriller', 'Action', 'Drama'], ['Horror'], ['Horror', 'Romance', 'Science Fiction'], ['Thriller'], ['Horror'], ['Animation', 'Family', 'Fantasy'], ['Science Fiction', 'Action', 'Horror'], ['Horror', 'Action'], ['Thriller', 'Action', 'Horror'], ['Comedy', 'Romance'], ['Drama', 'Horror', 'Thriller'], ['Thriller', 'Drama', 'Science Fiction'], ['TV Movie', 'Adventure', 'Comedy'], ['Horror', 'Thriller', 'Science Fiction'], ['TV Movie', 'Action', 'Science Fiction'], ['Drama'], ['Science Fiction', 'Drama', 'Family'], ['Thriller', 'Drama'], ['Comedy', 'Horror'], ['Drama', 'Thriller'], ['Drama', 'Romance', 'Comedy'], ['Crime', 'Drama', 'Thriller'], ['Documentary'], ['Documentary'], ['Thriller', 'Drama'], ['War', 'Action'], ['Adventure', 'Action', 'Comedy', 'Science Fiction'], ['Animation', 'Music', 'Family'], ['Drama', 'Horror', 'Science Fiction', 'Thriller'], ['Animation', 'Documentary'], ['Comedy'], ['Animation'], ['Drama', 'Comedy'], ['Drama', 'TV Movie', 'Crime', 'Mystery'], ['Documentary'], ['Drama', 'TV Movie'], ['Thriller'], ['Science Fiction', 'Action', 'Horror'], ['Crime', 'Drama', 'Thriller'], ['Science Fiction', 'Thriller', 'Fantasy'], ['Action', 'Science Fiction'], ['Comedy'], ['Science Fiction', 'Comedy', 'Horror'], ['Thriller', 'Horror'], ['Action', 'Animation', 'Family'], ['Action', 'Adventure', 'Science Fiction', 'Romance'], ['Drama'], ['Science Fiction', 'Horror'], ['Action', 'Animation', 'Family'], ['Drama', 'Comedy'], ['Drama'], ['Thriller', 'Drama'], ['Drama', 'History', 'Romance'], ['Drama', 'Music', 'TV Movie'], ['Horror'], ['Thriller', 'Horror'], ['Drama'], ['Family', 'Adventure'], ['Adventure', 'Documentary'], ['Animation'], ['Romance', 'Comedy', 'Drama'], ['Romance', 'Comedy'], ['Mystery', 'Drama', 'Thriller'], ['TV Movie', 'Horror', 'Science Fiction'], ['Action', 'Thriller'], ['Documentary'], ['Drama'], ['Drama', 'Crime'], ['Drama'], ['Horror', 'Science Fiction', 'TV Movie'], ['Drama', 'Comedy'], ['Science Fiction', 'Action', 'Animation', 'Adventure'], ['Romance', 'Drama'], ['Horror'], ['Drama'], ['Drama', 'Horror', 'Thriller'], ['Drama', 'Comedy'], ['Drama', 'Thriller', 'Horror'], ['Drama', 'Thriller', 'Mystery'], ['Drama', 'Science Fiction', 'Thriller'], ['Horror'], ['Comedy', 'Horror'], ['Romance', 'Comedy'], ['Horror', 'Science Fiction', 'Thriller'], ['History', 'Documentary', 'Family'], ['Horror', 'Comedy', 'Romance'], ['Horror'], ['Horror', 'Thriller'], ['Horror'], ['Drama', 'Comedy'], ['Science Fiction', 'Horror'], ['Drama'], ['Music', 'Comedy', 'Drama'], ['Horror'], ['Drama'], ['Thriller'], ['Comedy'], ['Drama', 'Comedy'], ['Horror'], ['Horror'], ['Thriller', 'Horror'], ['Romance', 'Comedy'], ['Romance', 'Drama'], ['History', 'Crime', 'Drama'], ['Horror', 'Thriller'], ['Action'], ['Drama', 'Thriller'], ['Action', 'Drama', 'Thriller'], ['Documentary'], ['Romance', 'Drama', 'Comedy', 'Mystery'], ['Fantasy', 'Drama'], ['TV Movie', 'Comedy'], ['Comedy', 'Music'], ['History', 'Drama', 'Family'], ['Thriller', 'Mystery', 'Science Fiction'], ['Comedy'], ['Drama'], ['Comedy'], ['Crime', 'Action'], ['Science Fiction', 'Drama', 'Thriller'], ['Drama', 'Mystery', 'Thriller'], ['Comedy'], ['Comedy', 'TV Movie', 'Romance'], ['Documentary', 'Music'], ['Drama'], ['Drama'], ['Comedy'], ['Documentary'], ['Documentary'], ['Mystery', 'Thriller', 'Horror', 'Science Fiction'], ['Drama', 'Romance'], ['Thriller', 'Horror', 'Science Fiction'], ['Horror'], ['Horror', 'Science Fiction', 'Thriller'], ['Comedy', 'Drama', 'Romance'], ['Drama', 'Thriller', 'Horror'], ['Horror', 'Science Fiction', 'Thriller'], ['Romance', 'Music', 'TV Movie', 'Comedy'], ['Horror'], ['Horror'], ['Drama', 'Music'], ['Horror'], ['Documentary', 'Music'], ['Science Fiction', 'Comedy', 'Animation', 'Family'], ['Thriller', 'Horror'], ['Drama', 'Comedy'], ['Documentary'], ['Documentary'], ['Documentary'], ['Comedy'], ['Mystery', 'Horror', 'Comedy'], ['Drama'], ['Documentary'], ['Science Fiction', 'Horror'], ['Mystery', 'Comedy'], ['Crime'], ['Comedy', 'Action', 'Adventure'], ['Documentary'], ['Thriller', 'Crime'], ['Drama', 'Science Fiction', 'Thriller'], ['Action', 'Crime', 'Drama', 'Thriller'], ['Comedy'], ['Comedy'], ['Drama', 'Thriller', 'Science Fiction'], ['Drama', 'Family'], ['Thriller', 'Action', 'Adventure', 'Crime'], ['Action', 'Comedy', 'Science Fiction'], ['Action'], ['Thriller', 'Drama'], ['Horror', 'Science Fiction'], ['Action', 'Comedy', 'Romance'], ['Crime', 'Action'], ['Comedy', 'TV Movie', 'Animation', 'Science Fiction'], ['TV Movie', 'Drama'], ['Thriller', 'Horror', 'Mystery'], ['Drama', 'Music'], ['Drama'], ['Comedy', 'Horror'], ['Horror', 'Animation', 'Mystery'], ['Drama'], ['Drama', 'Thriller'], ['Drama'], ['Comedy', 'Drama'], ['Documentary', 'Music'], ['Romance', 'Comedy'], ['Thriller'], ['Thriller', 'Horror'], ['Comedy'], ['Drama'], ['Romance', 'Drama', 'Music'], ['Drama'], ['Music', 'Documentary'], ['Action', 'Crime', 'Drama', 'Thriller'], ['Horror'], ['Drama', 'Horror'], ['Horror'], ['Science Fiction', 'Horror'], ['Action', 'Science Fiction'], ['Romance', 'Drama'], ['Documentary'], ['Drama'], ['Documentary'], ['Drama', 'History', 'Thriller', 'Science Fiction'], ['Horror', 'Documentary'], ['Comedy', 'Drama'], ['Thriller', 'Horror'], ['Drama', 'Thriller'], ['Action', 'Adventure', 'Science Fiction'], ['Romance', 'Comedy', 'Drama'], ['Drama', 'Comedy'], ['Documentary'], ['Drama'], ['Fantasy', 'Adventure', 'Comedy'], ['Romance', 'Comedy', 'Drama', 'Mystery'], ['Documentary'], ['Romance', 'TV Movie'], ['Horror'], ['Drama'], ['Mystery', 'Horror', 'Thriller'], ['Comedy'], ['Thriller'], ['Drama'], ['Documentary'], ['Documentary'], ['Thriller', 'Mystery', 'Drama', 'Horror'], ['Documentary'], ['Music', 'Drama', 'Documentary', 'Crime'], ['Mystery', 'Drama'], ['Comedy', 'Horror'], ['Horror', 'Thriller'], ['Comedy'], ['Drama', 'Romance', 'Comedy'], ['Documentary'], ['Horror'], ['Drama'], ['Documentary', 'Science Fiction'], ['Thriller', 'Drama'], ['Thriller', 'Documentary'], ['Documentary'], ['Horror'], ['Drama', 'Science Fiction', 'Comedy'], ['Documentary'], ['Thriller', 'Drama', 'Action', 'Crime'], ['Comedy', 'Romance'], ['Family', 'Action', 'Drama'], ['Horror'], ['Thriller', 'TV Movie', 'Science Fiction', 'Drama'], ['Thriller', 'Mystery', 'Horror', 'Fantasy'], ['Comedy', 'Drama', 'Romance'], ['Mystery', 'Drama', 'History', 'Family', 'Thriller'], ['Horror'], ['Music', 'Documentary'], ['Horror', 'Thriller'], ['Animation', 'Comedy', 'Family'], ['Comedy'], ['Crime', 'Thriller', 'Action', 'Horror'], ['Drama', 'Horror'], ['Drama'], ['Thriller', 'Adventure', 'Drama'], ['Comedy'], ['Comedy'], ['Documentary'], ['Action', 'Drama'], ['Comedy'], ['Drama'], ['Documentary'], ['Drama'], ['Drama', 'Horror'], ['Romance', 'Comedy'], ['Thriller', 'Crime', 'Documentary'], ['Drama'], ['Horror', 'Thriller', 'Science Fiction'], ['Drama', 'Horror', 'Thriller'], ['Comedy', 'Drama'], ['Documentary'], ['Thriller', 'Drama', 'Fantasy'], ['Documentary'], ['Drama', 'TV Movie'], ['Thriller', 'Drama'], ['Documentary'], ['Documentary'], ['Romance', 'War', 'Documentary', 'Drama'], ['Horror'], ['Documentary', 'History'], ['Action'], ['Music', 'Drama'], ['Drama', 'Thriller', 'Horror', 'Mystery'], ['Science Fiction', 'Action', 'Animation'], ['Drama', 'Comedy'], ['Comedy'], ['Comedy'], ['Thriller', 'Science Fiction'], ['Science Fiction', 'Comedy'], ['Comedy'], ['Comedy'], ['Family', 'Documentary'], ['Documentary'], ['Thriller', 'Mystery', 'Horror', 'Drama'], ['Animation', 'Comedy', 'Science Fiction'], ['Documentary'], ['Drama'], ['Drama', 'Thriller'], ['Thriller'], ['Drama', 'Comedy'], ['Horror', 'Comedy', 'Drama'], ['Drama', 'TV Movie'], ['Science Fiction', 'Mystery', 'Horror', 'Thriller'], ['Horror'], ['Documentary'], ['Documentary'], ['Comedy', 'Drama', 'Family'], ['Comedy'], ['Documentary'], ['Documentary'], ['Documentary'], ['Adventure', 'Drama', 'Science Fiction'], ['Action', 'Science Fiction', 'Adventure'], ['Action', 'Adventure', 'Science Fiction'], ['Action', 'Thriller'], ['Science Fiction', 'Adventure', 'Thriller'], ['Adventure', 'Fantasy'], ['Adventure', 'Family', 'Animation', 'Action', 'Comedy'], ['History', 'Drama', 'Thriller', 'War'], ['Action', 'Mystery', 'Science Fiction', 'Thriller'], ['Drama', 'Comedy'], ['Mystery', 'Thriller', 'Drama'], ['War', 'Drama', 'Action'], ['Adventure', 'Comedy', 'Fantasy', 'Family'], ['Action', 'Science Fiction', 'Thriller'], ['Action', 'Adventure', 'Fantasy', 'Science Fiction'], ['Action', 'Science Fiction'], ['Science Fiction', 'Action', 'Adventure', 'Fantasy', 'Comedy'], ['Comedy'], ['Crime', 'Drama', 'Thriller'], ['Science Fiction', 'Action', 'Adventure'], ['Comedy', 'Drama'], ['Drama', 'Music'], ['Science Fiction', 'Action', 'Drama', 'Thriller'], ['Thriller', 'Action', 'Crime'], ['Comedy'], ['Action', 'Adventure', 'Fantasy'], ['Action', 'Science Fiction'], ['Action', 'Comedy'], ['War', 'Action'], ['Adventure', 'Drama', 'Action'], ['Action', 'Science Fiction', 'Thriller'], ['Fantasy', 'Adventure', 'Action', 'Family', 'Romance'], ['Crime', 'Action', 'Thriller'], ['Horror'], ['Fantasy', 'Action', 'Adventure', 'Animation', 'Comedy'], ['Thriller', 'Horror'], ['Action', 'Adventure', 'Thriller'], ['Adventure', 'Fantasy', 'Animation', 'Comedy', 'Action'], ['Thriller'], ['Adventure', 'Fantasy'], ['Drama', 'Romance'], ['Comedy'], ['Horror', 'Action', 'Drama', 'Fantasy', 'War'], ['Family', 'Animation', 'Adventure', 'Comedy'], ['Fantasy', 'Comedy', 'Music'], ['Action', 'War'], ['Drama', 'Science Fiction'], ['Crime', 'Comedy', 'Action'], ['Crime', 'Drama', 'Mystery', 'Thriller'], ['Comedy'], ['Drama'], ['Comedy'], ['Horror', 'Thriller'], ['Thriller', 'Action', 'Science Fiction'], ['Drama'], ['Action', 'Thriller', 'Crime', 'Drama'], ['Action', 'Adventure'], ['Drama', 'Adventure'], ['Drama'], ['Action', 'Comedy', 'Crime'], ['Comedy'], ['Comedy'], ['Drama'], ['Romance', 'Drama', 'Music'], ['Comedy', 'Romance'], ['Romance', 'Drama'], ['Science Fiction', 'Drama'], ['Action', 'Comedy'], ['Thriller'], ['Comedy', 'Romance', 'Crime', 'Drama', 'Mystery'], ['Action', 'Drama', 'Thriller'], ['Action', 'Thriller', 'Mystery'], ['Science Fiction', 'Thriller'], ['Thriller', 'Science Fiction'], ['Action', 'Adventure', 'Fantasy'], ['Drama', 'Crime'], ['Science Fiction', 'Action'], ['Thriller', 'Science Fiction', 'Drama', 'Mystery'], ['Comedy', 'Romance'], ['Animation', 'Adventure', 'Family'], ['Comedy'], ['Crime', 'Thriller'], ['Adventure', 'Animation', 'Family'], ['Animation', 'Adventure', 'Comedy', 'Family'], ['Comedy'], ['Mystery', 'Thriller'], ['War', 'Drama', 'History', 'Action'], ['Drama', 'War'], ['History', 'Drama'], ['Drama'], ['Thriller', 'Romance'], ['Animation', 'Family'], ['Drama'], ['Action', 'Adventure', 'History', 'Romance', 'Drama'], ['Drama', 'Romance'], ['Music', 'Comedy', 'Drama', 'Family'], ['Horror', 'Thriller'], ['Romance', 'Animation', 'Adventure', 'Comedy', 'Family'], ['Comedy'], ['Action', 'Crime', 'Drama', 'Thriller'], ['Romance', 'Comedy'], ['Thriller', 'Crime', 'Action'], ['Drama'], ['Animation', 'Comedy', 'Family', 'Fantasy'], ['Drama'], ['Thriller', 'Action'], ['Adventure'], ['Family', 'Animation', 'Fantasy'], ['Comedy', 'Action', 'Drama', 'Thriller', 'Fantasy'], ['Thriller', 'Science Fiction'], ['Thriller', 'Drama'], ['Action', 'Thriller'], ['Action'], ['Action', 'Drama', 'Thriller', 'Crime'], ['Animation', 'Comedy', 'Adventure', 'Family'], ['Thriller', 'Action', 'Drama', 'War'], ['Action', 'Drama', 'Thriller', 'Crime'], ['Comedy', 'Western'], ['Thriller', 'Crime', 'Drama'], ['Comedy', 'Drama', 'Mystery'], ['Drama'], ['Thriller'], ['Drama'], ['Horror', 'Thriller'], ['Drama', 'Thriller'], ['Comedy', 'Romance'], ['Drama', 'Romance'], ['Crime', 'Drama'], ['Comedy', 'Romance'], ['Drama', 'Romance', 'War'], ['Thriller'], ['Comedy'], ['Thriller', 'Mystery'], ['Horror'], ['Drama', 'Horror', 'Thriller'], ['Family', 'Animation'], ['Horror', 'Thriller'], ['Comedy', 'Romance'], ['Drama'], ['Drama'], ['Action', 'Crime', 'Drama'], ['Drama', 'Thriller', 'Horror'], ['Comedy'], ['Drama'], ['Drama'], ['History', 'Drama', 'War'], ['Action'], ['Action', 'Crime', 'Thriller'], ['Drama'], ['Mystery', 'Drama', 'Action', 'Crime'], ['Thriller'], ['Drama', 'Fantasy', 'Mystery', 'Romance'], ['Drama', 'Comedy'], ['Romance', 'Comedy'], ['Mystery', 'Thriller', 'Action'], ['Western', 'Drama'], ['Fantasy', 'Comedy', 'Drama'], ['Crime', 'Thriller'], ['Comedy'], ['War', 'Drama'], ['Horror', 'Thriller'], ['Comedy', 'Drama', 'Romance'], ['Drama', 'Action'], ['Family', 'Comedy'], ['Drama', 'Comedy'], ['Western', 'Drama'], ['Action', 'Crime', 'Drama', 'Thriller'], ['Drama', 'Thriller'], ['Thriller', 'Drama'], ['Animation', 'Comedy', 'Family', 'Adventure'], ['Thriller', 'Action', 'Drama'], ['Thriller', 'Action', 'Crime'], ['Action', 'Adventure'], ['Crime', 'Drama', 'Thriller'], ['Music', 'Drama'], ['Comedy', 'Romance'], ['Drama'], ['Drama', 'Comedy'], ['Comedy', 'Drama'], ['Drama'], ['Drama', 'Comedy'], ['Science Fiction', 'TV Movie'], ['Drama', 'Romance', 'Comedy'], ['Thriller', 'Science Fiction', 'Drama'], ['Comedy', 'Drama', 'Crime'], ['Adventure', 'Drama', 'Comedy'], ['Drama'], ['Documentary'], ['Thriller', 'Animation', 'Action', 'Crime'], ['Comedy', 'Drama', 'Romance'], ['Animation'], ['Drama', 'Fantasy', 'Science Fiction'], ['Horror', 'Comedy', 'Thriller'], ['Thriller'], ['Drama'], ['Thriller', 'Drama', 'Action', 'Mystery'], ['Animation', 'Family'], ['Romance', 'Comedy', 'Animation', 'Drama', 'Family'], ['Drama'], ['Drama', 'Thriller'], ['Thriller'], ['Romance', 'Drama'], ['Drama'], ['Drama', 'Comedy', 'Romance'], ['Drama', 'Romance', 'Science Fiction'], ['Action'], ['Comedy'], ['Thriller', 'Drama'], ['Horror'], ['Action', 'Crime', 'Thriller'], ['Thriller'], ['Drama', 'Comedy'], ['Drama', 'Music'], ['Horror'], ['Thriller', 'Crime', 'Horror'], ['Drama'], ['Thriller', 'Crime', 'Drama', 'Mystery'], ['Drama'], ['Thriller', 'Science Fiction'], ['Comedy'], ['Comedy', 'Horror'], ['Thriller'], ['Horror'], ['Drama'], ['Drama', 'Comedy', 'Science Fiction'], ['Drama', 'Thriller'], ['Music', 'Documentary', 'Drama'], ['Family', 'Drama'], ['Comedy', 'Drama'], ['Action', 'Drama'], ['Drama', 'Comedy'], ['Thriller', 'Drama'], ['Thriller', 'Horror', 'Drama'], ['Drama'], ['Thriller', 'Horror'], ['Comedy', 'Adventure', 'Crime', 'Family'], ['Drama'], ['Horror', 'Fantasy', 'Thriller'], ['Comedy'], ['Thriller', 'Action'], ['Horror', 'Mystery', 'Thriller'], ['Comedy', 'Drama', 'Horror'], ['Drama', 'Comedy'], ['Drama'], ['Comedy'], ['Horror'], ['Comedy'], ['Thriller', 'Action', 'Science Fiction'], ['Action', 'Comedy', 'Thriller'], ['Drama'], ['Comedy', 'Horror'], ['Drama', 'Music'], ['Drama'], ['Drama'], ['Action', 'Crime', 'Thriller'], ['Drama', 'Mystery', 'Thriller'], ['Thriller', 'Drama', 'Science Fiction'], ['Thriller', 'Drama'], ['Family'], ['Comedy'], ['Adventure', 'Animation', 'Family', 'Fantasy'], ['Drama'], ['Documentary'], ['Drama', 'Romance'], ['Documentary', 'Family'], ['Horror'], ['Thriller', 'Horror'], ['Action', 'Adventure'], ['Drama'], ['Horror', 'Action', 'Comedy'], ['Horror', 'Thriller'], ['Thriller', 'Horror', 'Mystery'], ['Comedy', 'Drama', 'Romance'], ['History', 'Drama'], ['Fantasy', 'Comedy'], ['Adventure'], ['Horror', 'Action'], ['Comedy'], ['Comedy', 'Horror'], ['Action', 'Fantasy', 'Comedy', 'Horror', 'Mystery'], ['Drama'], ['Romance', 'Comedy', 'Horror'], ['Thriller', 'Horror'], ['Romance', 'Action', 'Comedy'], ['Action', 'Drama', 'Romance', 'TV Movie'], ['Comedy'], ['Drama'], ['Drama', 'Action', 'Crime'], ['Horror', 'Thriller'], ['Crime', 'Comedy'], ['Horror'], ['Thriller', 'Horror', 'Science Fiction'], ['Drama'], ['Comedy', 'Romance', 'Music'], ['Thriller'], ['Drama'], ['Romance'], ['Action', 'Comedy', 'Crime', 'Thriller'], ['Comedy'], ['Horror'], ['Drama', 'Comedy'], ['Thriller', 'Action', 'Science Fiction'], ['Comedy', 'Drama', 'Romance'], ['Drama'], ['Thriller', 'Drama', 'Romance'], ['Family', 'Comedy'], ['Action', 'Thriller'], ['Action'], ['Crime', 'Thriller'], ['Thriller', 'Drama'], ['Drama', 'Comedy'], ['Drama'], ['Action', 'Crime', 'Thriller'], ['Horror', 'Romance'], ['Drama'], ['Horror', 'Thriller'], ['Animation', 'Family'], ['Drama', 'Thriller'], ['Drama'], ['Drama'], ['TV Movie', 'Romance', 'Family'], ['Comedy', 'Drama'], ['Horror'], ['Comedy', 'Romance'], ['Mystery', 'Horror', 'Thriller'], ['Comedy'], ['Animation', 'Action', 'Adventure'], ['Family', 'Adventure', 'Science Fiction'], ['Drama'], ['Adventure', 'Action'], ['Thriller', 'Horror', 'Mystery'], ['Fantasy', 'Adventure'], ['Comedy', 'Drama', 'Music'], ['Action', 'Drama', 'Thriller'], ['Action', 'Horror', 'Science Fiction'], ['Western', 'Action', 'Drama', 'Science Fiction'], ['Thriller', 'Drama'], ['TV Movie', 'Crime', 'Mystery', 'Thriller'], ['Crime', 'Drama', 'Family', 'Romance'], ['Comedy', 'Romance'], ['Family', 'Animation', 'Music'], ['Action', 'Thriller'], ['Horror'], ['Drama', 'Action', 'Adventure'], ['Comedy'], ['Thriller'], ['Comedy'], ['Thriller', 'Crime', 'Drama'], ['Action', 'Drama'], ['Adventure', 'Science Fiction', 'Action', 'Thriller'], ['Crime', 'Drama', 'Thriller'], ['Drama', 'Comedy'], ['Drama', 'Action', 'Comedy', 'Crime'], ['Thriller', 'Drama'], ['Comedy'], ['Documentary'], ['Drama'], ['Comedy'], ['Horror', 'Science Fiction'], ['Horror'], ['Drama'], ['Horror'], ['Drama'], ['Documentary', 'Drama'], ['Thriller', 'Horror', 'Mystery'], ['Drama', 'Thriller', 'Action'], ['Comedy', 'Romance'], ['Science Fiction', 'Adventure', 'Comedy'], ['Science Fiction', 'Drama', 'Thriller'], ['Comedy', 'Drama'], ['Horror', 'Science Fiction'], ['Drama', 'Mystery', 'Thriller', 'TV Movie'], ['Drama', 'Crime', 'TV Movie'], ...]
Nous allons prendre seulement la 1e valeur.
list_genre=[]
for i in df_movies['genres']:
i=i.split('|')
list_genre.append(i[0])
list_genre
['Action', 'Action', 'Adventure', 'Action', 'Action', 'Western', 'Science Fiction', 'Drama', 'Family', 'Comedy', 'Action', 'Science Fiction', 'Drama', 'Action', 'Action', 'Crime', 'Crime', 'Science Fiction', 'Romance', 'War', 'Action', 'Action', 'Action', 'Drama', 'Comedy', 'Action', 'Comedy', 'Crime', 'Drama', 'Action', 'Mystery', 'Crime', 'Comedy', 'Thriller', 'Adventure', 'Drama', 'Crime', 'Adventure', 'Action', 'Drama', 'Drama', 'Fantasy', 'History', 'Comedy', 'Fantasy', 'Action', 'Fantasy', 'Drama', 'Thriller', 'Thriller', 'Romance', 'Comedy', 'Adventure', 'Comedy', 'Action', 'Drama', 'Science Fiction', 'Comedy', 'Drama', 'Mystery', 'Action', 'Action', 'Mystery', 'Action', 'Romance', 'Action', 'Thriller', 'Crime', 'Drama', 'Comedy', 'Drama', 'Crime', 'Thriller', 'Adventure', 'Action', 'Comedy', 'Adventure', 'Drama', 'Comedy', 'Drama', 'Romance', 'Animation', 'Family', 'Adventure', 'Action', 'Adventure', 'Horror', 'Horror', 'Drama', 'Crime', 'Thriller', 'Comedy', 'Fantasy', 'Thriller', 'Adventure', 'Adventure', 'Drama', 'Romance', 'Horror', 'Action', 'Adventure', 'Action', 'Thriller', 'Thriller', 'Comedy', 'Comedy', 'Comedy', 'Horror', 'Crime', 'Crime', 'Horror', 'Drama', 'Action', 'Crime', 'Comedy', 'Drama', 'Action', 'Comedy', 'Science Fiction', 'Comedy', 'Thriller', 'Mystery', 'Thriller', 'Comedy', 'Horror', 'Romance', 'Drama', 'Crime', 'Comedy', 'Family', 'Crime', 'Adventure', 'Family', 'Drama', 'Comedy', 'Action', 'Horror', 'Action', 'Drama', 'Drama', 'Action', 'Comedy', 'Drama', 'Adventure', 'Animation', 'Horror', 'Horror', 'Drama', 'Drama', 'Thriller', 'Drama', 'Horror', 'Horror', 'Action', 'Romance', 'Mystery', 'Drama', 'Comedy', 'Comedy', 'Action', 'Drama', 'Crime', 'Drama', 'Comedy', 'Drama', 'Comedy', 'Comedy', 'History', 'Drama', 'Drama', 'Drama', 'Adventure', 'History', 'Action', 'Comedy', 'Music', 'Music', 'Romance', 'Drama', 'Drama', 'Comedy', 'Action', 'Drama', 'Adventure', 'Romance', 'Action', 'Drama', 'Drama', 'Thriller', 'Drama', 'Animation', 'Thriller', 'Thriller', 'Comedy', 'Western', 'Documentary', 'Drama', 'Fantasy', 'Family', 'Horror', 'Romance', 'Romance', 'Drama', 'Drama', 'Thriller', 'Comedy', 'Comedy', 'Drama', 'Comedy', 'Thriller', 'Drama', 'Comedy', 'Romance', 'Drama', 'Drama', 'Mystery', 'Animation', 'Thriller', 'Action', 'Drama', 'Drama', 'Drama', 'Mystery', 'Drama', 'Horror', 'Thriller', 'Action', 'Thriller', 'Adventure', 'Comedy', 'Fantasy', 'Music', 'War', 'Thriller', 'Drama', 'Drama', 'Horror', 'Drama', 'Drama', 'Animation', 'Thriller', 'Documentary', 'Horror', 'Drama', 'Horror', 'History', 'Drama', 'Drama', 'Horror', 'Thriller', 'Action', 'Drama', 'Crime', 'Drama', 'Thriller', 'Fantasy', 'Adventure', 'Action', 'Action', 'TV Movie', 'Action', 'Drama', 'Horror', 'Horror', 'Drama', 'Comedy', 'Comedy', 'Music', 'Comedy', 'Music', 'Drama', 'Horror', 'Comedy', 'Action', 'Drama', 'Action', 'Fantasy', 'Action', 'Thriller', 'Animation', 'Drama', 'Thriller', 'Horror', 'Thriller', 'Comedy', 'Thriller', 'Family', 'Drama', 'Thriller', 'Action', 'Documentary', 'Drama', 'Horror', 'Drama', 'Thriller', 'Family', 'Thriller', 'Thriller', 'Crime', 'Thriller', 'Drama', 'Thriller', 'Drama', 'Fantasy', 'Drama', 'Family', 'Thriller', 'Science Fiction', 'Drama', 'Thriller', 'Horror', 'Adventure', 'Thriller', 'Horror', 'Horror', 'Thriller', 'Horror', 'Animation', 'Science Fiction', 'Horror', 'Thriller', 'Comedy', 'Drama', 'Thriller', 'TV Movie', 'Horror', 'TV Movie', 'Drama', 'Science Fiction', 'Thriller', 'Comedy', 'Drama', 'Drama', 'Crime', 'Documentary', 'Documentary', 'Thriller', 'War', 'Adventure', 'Animation', 'Drama', 'Animation', 'Comedy', 'Animation', 'Drama', 'Drama', 'Documentary', 'Drama', 'Thriller', 'Science Fiction', 'Crime', 'Science Fiction', 'Action', 'Comedy', 'Science Fiction', 'Thriller', 'Action', 'Action', 'Drama', 'Science Fiction', 'Action', 'Drama', 'Drama', 'Thriller', 'Drama', 'Drama', 'Horror', 'Thriller', 'Drama', 'Family', 'Adventure', 'Animation', 'Romance', 'Romance', 'Mystery', 'TV Movie', 'Action', 'Documentary', 'Drama', 'Drama', 'Drama', 'Horror', 'Drama', 'Science Fiction', 'Romance', 'Horror', 'Drama', 'Drama', 'Drama', 'Drama', 'Drama', 'Drama', 'Horror', 'Comedy', 'Romance', 'Horror', 'History', 'Horror', 'Horror', 'Horror', 'Horror', 'Drama', 'Science Fiction', 'Drama', 'Music', 'Horror', 'Drama', 'Thriller', 'Comedy', 'Drama', 'Horror', 'Horror', 'Thriller', 'Romance', 'Romance', 'History', 'Horror', 'Action', 'Drama', 'Action', 'Documentary', 'Romance', 'Fantasy', 'TV Movie', 'Comedy', 'History', 'Thriller', 'Comedy', 'Drama', 'Comedy', 'Crime', 'Science Fiction', 'Drama', 'Comedy', 'Comedy', 'Documentary', 'Drama', 'Drama', 'Comedy', 'Documentary', 'Documentary', 'Mystery', 'Drama', 'Thriller', 'Horror', 'Horror', 'Comedy', 'Drama', 'Horror', 'Romance', 'Horror', 'Horror', 'Drama', 'Horror', 'Documentary', 'Science Fiction', 'Thriller', 'Drama', 'Documentary', 'Documentary', 'Documentary', 'Comedy', 'Mystery', 'Drama', 'Documentary', 'Science Fiction', 'Mystery', 'Crime', 'Comedy', 'Documentary', 'Thriller', 'Drama', 'Action', 'Comedy', 'Comedy', 'Drama', 'Drama', 'Thriller', 'Action', 'Action', 'Thriller', 'Horror', 'Action', 'Crime', 'Comedy', 'TV Movie', 'Thriller', 'Drama', 'Drama', 'Comedy', 'Horror', 'Drama', 'Drama', 'Drama', 'Comedy', 'Documentary', 'Romance', 'Thriller', 'Thriller', 'Comedy', 'Drama', 'Romance', 'Drama', 'Music', 'Action', 'Horror', 'Drama', 'Horror', 'Science Fiction', 'Action', 'Romance', 'Documentary', 'Drama', 'Documentary', 'Drama', 'Horror', 'Comedy', 'Thriller', 'Drama', 'Action', 'Romance', 'Drama', 'Documentary', 'Drama', 'Fantasy', 'Romance', 'Documentary', 'Romance', 'Horror', 'Drama', 'Mystery', 'Comedy', 'Thriller', 'Drama', 'Documentary', 'Documentary', 'Thriller', 'Documentary', 'Music', 'Mystery', 'Comedy', 'Horror', 'Comedy', 'Drama', 'Documentary', 'Horror', 'Drama', 'Documentary', 'Thriller', 'Thriller', 'Documentary', 'Horror', 'Drama', 'Documentary', 'Thriller', 'Comedy', 'Family', 'Horror', 'Thriller', 'Thriller', 'Comedy', 'Mystery', 'Horror', 'Music', 'Horror', 'Animation', 'Comedy', 'Crime', 'Drama', 'Drama', 'Thriller', 'Comedy', 'Comedy', 'Documentary', 'Action', 'Comedy', 'Drama', 'Documentary', 'Drama', 'Drama', 'Romance', 'Thriller', 'Drama', 'Horror', 'Drama', 'Comedy', 'Documentary', 'Thriller', 'Documentary', 'Drama', 'Thriller', 'Documentary', 'Documentary', 'Romance', 'Horror', 'Documentary', 'Action', 'Music', 'Drama', 'Science Fiction', 'Drama', 'Comedy', 'Comedy', 'Thriller', 'Science Fiction', 'Comedy', 'Comedy', 'Family', 'Documentary', 'Thriller', 'Animation', 'Documentary', 'Drama', 'Drama', 'Thriller', 'Drama', 'Horror', 'Drama', 'Science Fiction', 'Horror', 'Documentary', 'Documentary', 'Comedy', 'Comedy', 'Documentary', 'Documentary', 'Documentary', 'Adventure', 'Action', 'Action', 'Action', 'Science Fiction', 'Adventure', 'Adventure', 'History', 'Action', 'Drama', 'Mystery', 'War', 'Adventure', 'Action', 'Action', 'Action', 'Science Fiction', 'Comedy', 'Crime', 'Science Fiction', 'Comedy', 'Drama', 'Science Fiction', 'Thriller', 'Comedy', 'Action', 'Action', 'Action', 'War', 'Adventure', 'Action', 'Fantasy', 'Crime', 'Horror', 'Fantasy', 'Thriller', 'Action', 'Adventure', 'Thriller', 'Adventure', 'Drama', 'Comedy', 'Horror', 'Family', 'Fantasy', 'Action', 'Drama', 'Crime', 'Crime', 'Comedy', 'Drama', 'Comedy', 'Horror', 'Thriller', 'Drama', 'Action', 'Action', 'Drama', 'Drama', 'Action', 'Comedy', 'Comedy', 'Drama', 'Romance', 'Comedy', 'Romance', 'Science Fiction', 'Action', 'Thriller', 'Comedy', 'Action', 'Action', 'Science Fiction', 'Thriller', 'Action', 'Drama', 'Science Fiction', 'Thriller', 'Comedy', 'Animation', 'Comedy', 'Crime', 'Adventure', 'Animation', 'Comedy', 'Mystery', 'War', 'Drama', 'History', 'Drama', 'Thriller', 'Animation', 'Drama', 'Action', 'Drama', 'Music', 'Horror', 'Romance', 'Comedy', 'Action', 'Romance', 'Thriller', 'Drama', 'Animation', 'Drama', 'Thriller', 'Adventure', 'Family', 'Comedy', 'Thriller', 'Thriller', 'Action', 'Action', 'Action', 'Animation', 'Thriller', 'Action', 'Comedy', 'Thriller', 'Comedy', 'Drama', 'Thriller', 'Drama', 'Horror', 'Drama', 'Comedy', 'Drama', 'Crime', 'Comedy', 'Drama', 'Thriller', 'Comedy', 'Thriller', 'Horror', 'Drama', 'Family', 'Horror', 'Comedy', 'Drama', 'Drama', 'Action', 'Drama', 'Comedy', 'Drama', 'Drama', 'History', 'Action', 'Action', 'Drama', 'Mystery', 'Thriller', 'Drama', 'Drama', 'Romance', 'Mystery', 'Western', 'Fantasy', 'Crime', 'Comedy', 'War', 'Horror', 'Comedy', 'Drama', 'Family', 'Drama', 'Western', 'Action', 'Drama', 'Thriller', 'Animation', 'Thriller', 'Thriller', 'Action', 'Crime', 'Music', 'Comedy', 'Drama', 'Drama', 'Comedy', 'Drama', 'Drama', 'Science Fiction', 'Drama', 'Thriller', 'Comedy', 'Adventure', 'Drama', 'Documentary', 'Thriller', 'Comedy', 'Animation', 'Drama', 'Horror', 'Thriller', 'Drama', 'Thriller', 'Animation', 'Romance', 'Drama', 'Drama', 'Thriller', 'Romance', 'Drama', 'Drama', 'Drama', 'Action', 'Comedy', 'Thriller', 'Horror', 'Action', 'Thriller', 'Drama', 'Drama', 'Horror', 'Thriller', 'Drama', 'Thriller', 'Drama', 'Thriller', 'Comedy', 'Comedy', 'Thriller', 'Horror', 'Drama', 'Drama', 'Drama', 'Music', 'Family', 'Comedy', 'Action', 'Drama', 'Thriller', 'Thriller', 'Drama', 'Thriller', 'Comedy', 'Drama', 'Horror', 'Comedy', 'Thriller', 'Horror', 'Comedy', 'Drama', 'Drama', 'Comedy', 'Horror', 'Comedy', 'Thriller', 'Action', 'Drama', 'Comedy', 'Drama', 'Drama', 'Drama', 'Action', 'Drama', 'Thriller', 'Thriller', 'Family', 'Comedy', 'Adventure', 'Drama', 'Documentary', 'Drama', 'Documentary', 'Horror', 'Thriller', 'Action', 'Drama', 'Horror', 'Horror', 'Thriller', 'Comedy', 'History', 'Fantasy', 'Adventure', 'Horror', 'Comedy', 'Comedy', 'Action', 'Drama', 'Romance', 'Thriller', 'Romance', 'Action', 'Comedy', 'Drama', 'Drama', 'Horror', 'Crime', 'Horror', 'Thriller', 'Drama', 'Comedy', 'Thriller', 'Drama', 'Romance', 'Action', 'Comedy', 'Horror', 'Drama', 'Thriller', 'Comedy', 'Drama', 'Thriller', 'Family', 'Action', 'Action', 'Crime', 'Thriller', 'Drama', 'Drama', 'Action', 'Horror', 'Drama', 'Horror', 'Animation', 'Drama', 'Drama', 'Drama', 'TV Movie', 'Comedy', 'Horror', 'Comedy', 'Mystery', 'Comedy', 'Animation', 'Family', 'Drama', 'Adventure', 'Thriller', 'Fantasy', 'Comedy', 'Action', 'Action', 'Western', 'Thriller', 'TV Movie', 'Crime', 'Comedy', 'Family', 'Action', 'Horror', 'Drama', 'Comedy', 'Thriller', 'Comedy', 'Thriller', 'Action', 'Adventure', 'Crime', 'Drama', 'Drama', 'Thriller', 'Comedy', 'Documentary', 'Drama', 'Comedy', 'Horror', 'Horror', 'Drama', 'Horror', 'Drama', 'Documentary', 'Thriller', 'Drama', 'Comedy', 'Science Fiction', 'Science Fiction', 'Comedy', 'Horror', 'Drama', 'Drama', ...]
"genres"
en "genre_adj"
¶list_genre=[]
for i in df_movies['genres']:
i=i.split('|')
list_genre.append(i[0])
df_movies['genres_adj']=list_genre
df_movies.head()
popularity | original_title | genres | vote_count | vote_average | release_year | budget_adj | revenue_adj | genres_adj | |
---|---|---|---|---|---|---|---|---|---|
0 | 32.985763 | Jurassic World | Action|Adventure|Science Fiction|Thriller | 5562 | 6.5 | 2015 | 137999939.3 | 1.392446e+09 | Action |
1 | 28.419936 | Mad Max: Fury Road | Action|Adventure|Science Fiction|Thriller | 6185 | 7.1 | 2015 | 137999939.3 | 3.481613e+08 | Action |
2 | 13.112507 | Insurgent | Adventure|Science Fiction|Thriller | 2480 | 6.3 | 2015 | 101199955.5 | 2.716190e+08 | Adventure |
3 | 11.173104 | Star Wars: The Force Awakens | Action|Adventure|Science Fiction|Fantasy | 5292 | 7.5 | 2015 | 183999919.0 | 1.902723e+09 | Action |
4 | 9.335014 | Furious 7 | Action|Crime|Thriller | 2947 | 7.3 | 2015 | 174799923.1 | 1.385749e+09 | Action |
Les Variables numériques
Analyse univariée (Univariate analysis)
df_movies['revenue_adj'].mean()
51477974.92242079
df_movies['budget_adj'].mean()
17587121.43827647
df_movies['vote_average'].unique()
array([6.5, 7.1, 6.3, 7.5, 7.3, 7.2, 5.8, 7.6, 8. , 6.2, 5.2, 7.4, 6.1, 7. , 6.8, 5.3, 7.8, 6.4, 6.6, 7.7, 5.6, 6.9, 5.9, 6.7, 5.5, 5. , 4.4, 5.4, 5.1, 4.8, 5.7, 4.1, 3.9, 4.5, 6. , 4.2, 3.6, 4.3, 4.9, 4.7, 4. , 3.5, 3.8, 3.3, 3.7, 4.6, 7.9, 8.2, 2.6, 3.1, 8.9, 3.2, 2.4, 8.4, 3. , 2.8, 3.4, 8.8, 8.1, 8.3, 2.7, 2.5, 2.1, 8.6, 2.9, 8.5, 9.2, 2.2, 2. , 8.7, 2.3, 1.5])
df_movies['vote_average'].mean()
5.974063825862387
df_movies['vote_average'].min()
1.5
df_movies['vote_average'].max()
9.2
df_movies['genres_adj'].unique()
array(['Action', 'Adventure', 'Western', 'Science Fiction', 'Drama', 'Family', 'Comedy', 'Crime', 'Romance', 'War', 'Mystery', 'Thriller', 'Fantasy', 'History', 'Animation', 'Horror', 'Music', 'Documentary', 'TV Movie', 'Foreign'], dtype=object)
df_movies['genres_adj'].value_counts()
Drama 2453 Comedy 2319 Action 1590 Horror 915 Adventure 586 Thriller 491 Documentary 432 Animation 403 Crime 380 Fantasy 272 Science Fiction 214 Romance 186 Family 144 Mystery 125 Music 100 TV Movie 78 War 59 History 44 Western 42 Foreign 9 Name: genres_adj, dtype: int64
df_movies['vote_average'].plot(kind='hist');
df_movies['revenue_adj'].plot(kind='hist');
df_movies.corr()
popularity | vote_count | vote_average | release_year | budget_adj | revenue_adj | |
---|---|---|---|---|---|---|
popularity | 1.000000 | 0.800786 | 0.210432 | 0.090200 | 0.513311 | 0.608964 |
vote_count | 0.800786 | 1.000000 | 0.254698 | 0.108275 | 0.586923 | 0.707865 |
vote_average | 0.210432 | 0.254698 | 1.000000 | -0.119851 | 0.093811 | 0.193836 |
release_year | 0.090200 | 0.108275 | -0.119851 | 1.000000 | 0.017076 | -0.066118 |
budget_adj | 0.513311 | 0.586923 | 0.093811 | 0.017076 | 1.000000 | 0.646507 |
revenue_adj | 0.608964 | 0.707865 | 0.193836 | -0.066118 | 0.646507 | 1.000000 |
# Analyse Bivariée
corr = df_movies.corr()
fig, ax = plt.subplots(figsize=(10,8))
sns.heatmap(corr,xticklabels=corr.columns, yticklabels=corr.columns,annot=True,cmap='RdYlGn');
plt.figure(figsize=(10,8))
sns.heatmap(df_movies.corr(),annot=True,cmap='RdYlGn'); # cmap='viridis'
df_movies.columns
Index(['popularity', 'original_title', 'genres', 'vote_count', 'vote_average', 'release_year', 'budget_adj', 'revenue_adj', 'genres_adj'], dtype='object')
fig, ax = plt.subplots(figsize=(8,6))
sns.heatmap(corr)
<matplotlib.axes._subplots.AxesSubplot at 0x7f8cce14a450>
df_movies.plot(kind='scatter', x='budget_adj', y='revenue_adj', figsize=(10,6));
Il y a une correlation entre les revenues et budget
df_movies.plot(kind='scatter', x='vote_count', y='revenue_adj', figsize=(10, 6));
plt.figure(figsize = (10,6))
df_movies.plot(kind='scatter', x='vote_average', y='revenue_adj',figsize=(10,6));
<Figure size 720x432 with 0 Axes>
les votes qui sont à partir de 7, ont un très bon revenu.
df_movies.groupby(['genres_adj'])['revenue_adj'].mean()
genres_adj Action 7.401006e+07 Adventure 1.665356e+08 Animation 7.671443e+07 Comedy 3.837766e+07 Crime 4.686688e+07 Documentary 2.150125e+06 Drama 3.578622e+07 Family 7.724863e+07 Fantasy 8.283760e+07 Foreign 0.000000e+00 History 6.536195e+07 Horror 2.417979e+07 Music 3.807907e+07 Mystery 3.002289e+07 Romance 4.721514e+07 Science Fiction 9.951810e+07 TV Movie 7.485782e+05 Thriller 3.096924e+07 War 4.874310e+07 Western 4.730739e+07 Name: revenue_adj, dtype: float64
df_genre_rev = df_movies.groupby(['genres_adj'])['revenue_adj'].mean()
df_genre_rev.plot(kind='bar',figsize=(12, 8))
<matplotlib.axes._subplots.AxesSubplot at 0x7f8ccdf75e10>
df_genre_bud = df_movies.groupby(['genres_adj'])['budget_adj'].mean()
df_genre_bud.plot(kind='bar',figsize=(12, 8));
df_genre_bud.plot.pie(y='mass', figsize=(15, 10));
df_movies.plot(kind='scatter', x='vote_average', y='revenue_adj',figsize=(12, 8))
<matplotlib.axes._subplots.AxesSubplot at 0x7f8ccded1b10>
df_movies.plot(kind='scatter', x='budget_adj', y='revenue_adj',figsize=(12, 8))
<matplotlib.axes._subplots.AxesSubplot at 0x7f8ccddea390>
plt.figure(figsize=(10,8))
df_movies.corr()['vote_average'].sort_values().drop('vote_average').plot(kind='bar')
<matplotlib.axes._subplots.AxesSubplot at 0x7f8cce056a90>