mirror of
https://github.com/aladdinpersson/Machine-Learning-Collection.git
synced 2026-02-20 13:50:41 +00:00
459 lines
16 KiB
Plaintext
459 lines
16 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 207,
|
|
"id": "937dd4ed",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import pandas as pd\n",
|
|
"\n",
|
|
"def load_data():\n",
|
|
" # Load the MovieLens data\n",
|
|
" movies_df = pd.read_csv(\"movielens_small/movies.csv\")\n",
|
|
" ratings_df = pd.read_csv(\"movielens_small/ratings.csv\")\n",
|
|
" return movies_df, ratings_df\n",
|
|
"\n",
|
|
"def calculate_popularity(movies_df, ratings_df, damping_factor=5):\n",
|
|
" # Calculate the number of ratings, mean rating, and sum of ratings for each movie\n",
|
|
" num_ratings = ratings_df.groupby(\"movieId\")[\"rating\"].count()\n",
|
|
" mean_rating = ratings_df.groupby(\"movieId\")[\"rating\"].mean()\n",
|
|
" global_mean = ratings_df[\"rating\"].mean()\n",
|
|
" \n",
|
|
" # Calculate the damped mean rating for each movie\n",
|
|
" damped_numerator = num_ratings * mean_rating + damping_factor * global_mean\n",
|
|
" damped_denominator = num_ratings + damping_factor\n",
|
|
" damped_mean_rating = damped_numerator / damped_denominator\n",
|
|
" \n",
|
|
" # Add the popularity data to the movie data\n",
|
|
" movies_df['num_ratings'] = movies_df['movieId'].map(num_ratings)\n",
|
|
" movies_df['mean_rating'] = movies_df['movieId'].map(mean_rating)\n",
|
|
" movies_df['damped_mean_rating'] = movies_df['movieId'].map(damped_mean_rating)\n",
|
|
" return movies_df\n",
|
|
"\n",
|
|
"movies_df, ratings_df = load_data()\n",
|
|
"movies_df = calculate_popularity(movies_df, ratings_df, damping_factor=10)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 198,
|
|
"id": "7e649c6f",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<style scoped>\n",
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
" vertical-align: middle;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe tbody tr th {\n",
|
|
" vertical-align: top;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe thead th {\n",
|
|
" text-align: right;\n",
|
|
" }\n",
|
|
"</style>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>movieId</th>\n",
|
|
" <th>title</th>\n",
|
|
" <th>genres</th>\n",
|
|
" <th>num_ratings</th>\n",
|
|
" <th>mean_rating</th>\n",
|
|
" <th>damped_mean_rating</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>314</th>\n",
|
|
" <td>356</td>\n",
|
|
" <td>Forrest Gump (1994)</td>\n",
|
|
" <td>Comedy|Drama|Romance|War</td>\n",
|
|
" <td>329.0</td>\n",
|
|
" <td>4.164134</td>\n",
|
|
" <td>4.144589</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>277</th>\n",
|
|
" <td>318</td>\n",
|
|
" <td>Shawshank Redemption, The (1994)</td>\n",
|
|
" <td>Crime|Drama</td>\n",
|
|
" <td>317.0</td>\n",
|
|
" <td>4.429022</td>\n",
|
|
" <td>4.400659</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>257</th>\n",
|
|
" <td>296</td>\n",
|
|
" <td>Pulp Fiction (1994)</td>\n",
|
|
" <td>Comedy|Crime|Drama|Thriller</td>\n",
|
|
" <td>307.0</td>\n",
|
|
" <td>4.197068</td>\n",
|
|
" <td>4.175128</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>510</th>\n",
|
|
" <td>593</td>\n",
|
|
" <td>Silence of the Lambs, The (1991)</td>\n",
|
|
" <td>Crime|Horror|Thriller</td>\n",
|
|
" <td>279.0</td>\n",
|
|
" <td>4.161290</td>\n",
|
|
" <td>4.138462</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>1939</th>\n",
|
|
" <td>2571</td>\n",
|
|
" <td>Matrix, The (1999)</td>\n",
|
|
" <td>Action|Sci-Fi|Thriller</td>\n",
|
|
" <td>278.0</td>\n",
|
|
" <td>4.192446</td>\n",
|
|
" <td>4.168457</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" movieId title genres \n",
|
|
"314 356 Forrest Gump (1994) Comedy|Drama|Romance|War \\\n",
|
|
"277 318 Shawshank Redemption, The (1994) Crime|Drama \n",
|
|
"257 296 Pulp Fiction (1994) Comedy|Crime|Drama|Thriller \n",
|
|
"510 593 Silence of the Lambs, The (1991) Crime|Horror|Thriller \n",
|
|
"1939 2571 Matrix, The (1999) Action|Sci-Fi|Thriller \n",
|
|
"\n",
|
|
" num_ratings mean_rating damped_mean_rating \n",
|
|
"314 329.0 4.164134 4.144589 \n",
|
|
"277 317.0 4.429022 4.400659 \n",
|
|
"257 307.0 4.197068 4.175128 \n",
|
|
"510 279.0 4.161290 4.138462 \n",
|
|
"1939 278.0 4.192446 4.168457 "
|
|
]
|
|
},
|
|
"execution_count": 198,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"movies_df.sort_values(by=\"num_ratings\", ascending=False).head()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 204,
|
|
"id": "c6ef332e",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<style scoped>\n",
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
" vertical-align: middle;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe tbody tr th {\n",
|
|
" vertical-align: top;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe thead th {\n",
|
|
" text-align: right;\n",
|
|
" }\n",
|
|
"</style>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>movieId</th>\n",
|
|
" <th>title</th>\n",
|
|
" <th>genres</th>\n",
|
|
" <th>num_ratings</th>\n",
|
|
" <th>mean_rating</th>\n",
|
|
" <th>damped_mean_rating</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>7656</th>\n",
|
|
" <td>88448</td>\n",
|
|
" <td>Paper Birds (Pájaros de papel) (2010)</td>\n",
|
|
" <td>Comedy|Drama</td>\n",
|
|
" <td>1.0</td>\n",
|
|
" <td>5.0</td>\n",
|
|
" <td>3.637779</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>8107</th>\n",
|
|
" <td>100556</td>\n",
|
|
" <td>Act of Killing, The (2012)</td>\n",
|
|
" <td>Documentary</td>\n",
|
|
" <td>1.0</td>\n",
|
|
" <td>5.0</td>\n",
|
|
" <td>3.637779</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>9083</th>\n",
|
|
" <td>143031</td>\n",
|
|
" <td>Jump In! (2007)</td>\n",
|
|
" <td>Comedy|Drama|Romance</td>\n",
|
|
" <td>1.0</td>\n",
|
|
" <td>5.0</td>\n",
|
|
" <td>3.637779</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>9094</th>\n",
|
|
" <td>143511</td>\n",
|
|
" <td>Human (2015)</td>\n",
|
|
" <td>Documentary</td>\n",
|
|
" <td>1.0</td>\n",
|
|
" <td>5.0</td>\n",
|
|
" <td>3.637779</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>9096</th>\n",
|
|
" <td>143559</td>\n",
|
|
" <td>L.A. Slasher (2015)</td>\n",
|
|
" <td>Comedy|Crime|Fantasy</td>\n",
|
|
" <td>1.0</td>\n",
|
|
" <td>5.0</td>\n",
|
|
" <td>3.637779</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" movieId title genres \n",
|
|
"7656 88448 Paper Birds (Pájaros de papel) (2010) Comedy|Drama \\\n",
|
|
"8107 100556 Act of Killing, The (2012) Documentary \n",
|
|
"9083 143031 Jump In! (2007) Comedy|Drama|Romance \n",
|
|
"9094 143511 Human (2015) Documentary \n",
|
|
"9096 143559 L.A. Slasher (2015) Comedy|Crime|Fantasy \n",
|
|
"\n",
|
|
" num_ratings mean_rating damped_mean_rating \n",
|
|
"7656 1.0 5.0 3.637779 \n",
|
|
"8107 1.0 5.0 3.637779 \n",
|
|
"9083 1.0 5.0 3.637779 \n",
|
|
"9094 1.0 5.0 3.637779 \n",
|
|
"9096 1.0 5.0 3.637779 "
|
|
]
|
|
},
|
|
"execution_count": 204,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"movies_df.sort_values(by=\"mean_rating\", ascending=False).head(5)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 201,
|
|
"id": "f669fb09",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<style scoped>\n",
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
" vertical-align: middle;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe tbody tr th {\n",
|
|
" vertical-align: top;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe thead th {\n",
|
|
" text-align: right;\n",
|
|
" }\n",
|
|
"</style>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>movieId</th>\n",
|
|
" <th>title</th>\n",
|
|
" <th>genres</th>\n",
|
|
" <th>num_ratings</th>\n",
|
|
" <th>mean_rating</th>\n",
|
|
" <th>damped_mean_rating</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>277</th>\n",
|
|
" <td>318</td>\n",
|
|
" <td>Shawshank Redemption, The (1994)</td>\n",
|
|
" <td>Crime|Drama</td>\n",
|
|
" <td>317.0</td>\n",
|
|
" <td>4.429022</td>\n",
|
|
" <td>4.400659</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>659</th>\n",
|
|
" <td>858</td>\n",
|
|
" <td>Godfather, The (1972)</td>\n",
|
|
" <td>Crime|Drama</td>\n",
|
|
" <td>192.0</td>\n",
|
|
" <td>4.289062</td>\n",
|
|
" <td>4.250077</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>2226</th>\n",
|
|
" <td>2959</td>\n",
|
|
" <td>Fight Club (1999)</td>\n",
|
|
" <td>Action|Crime|Drama|Thriller</td>\n",
|
|
" <td>218.0</td>\n",
|
|
" <td>4.272936</td>\n",
|
|
" <td>4.239103</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>922</th>\n",
|
|
" <td>1221</td>\n",
|
|
" <td>Godfather: Part II, The (1974)</td>\n",
|
|
" <td>Crime|Drama</td>\n",
|
|
" <td>129.0</td>\n",
|
|
" <td>4.259690</td>\n",
|
|
" <td>4.205148</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>46</th>\n",
|
|
" <td>50</td>\n",
|
|
" <td>Usual Suspects, The (1995)</td>\n",
|
|
" <td>Crime|Mystery|Thriller</td>\n",
|
|
" <td>204.0</td>\n",
|
|
" <td>4.237745</td>\n",
|
|
" <td>4.203344</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>224</th>\n",
|
|
" <td>260</td>\n",
|
|
" <td>Star Wars: Episode IV - A New Hope (1977)</td>\n",
|
|
" <td>Action|Adventure|Sci-Fi</td>\n",
|
|
" <td>251.0</td>\n",
|
|
" <td>4.231076</td>\n",
|
|
" <td>4.203125</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>602</th>\n",
|
|
" <td>750</td>\n",
|
|
" <td>Dr. Strangelove or: How I Learned to Stop Worr...</td>\n",
|
|
" <td>Comedy|War</td>\n",
|
|
" <td>97.0</td>\n",
|
|
" <td>4.268041</td>\n",
|
|
" <td>4.196407</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>914</th>\n",
|
|
" <td>1213</td>\n",
|
|
" <td>Goodfellas (1990)</td>\n",
|
|
" <td>Crime|Drama</td>\n",
|
|
" <td>126.0</td>\n",
|
|
" <td>4.250000</td>\n",
|
|
" <td>4.194967</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>461</th>\n",
|
|
" <td>527</td>\n",
|
|
" <td>Schindler's List (1993)</td>\n",
|
|
" <td>Drama|War</td>\n",
|
|
" <td>220.0</td>\n",
|
|
" <td>4.225000</td>\n",
|
|
" <td>4.193546</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>6710</th>\n",
|
|
" <td>58559</td>\n",
|
|
" <td>Dark Knight, The (2008)</td>\n",
|
|
" <td>Action|Crime|Drama|IMAX</td>\n",
|
|
" <td>149.0</td>\n",
|
|
" <td>4.238255</td>\n",
|
|
" <td>4.191922</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" movieId title \n",
|
|
"277 318 Shawshank Redemption, The (1994) \\\n",
|
|
"659 858 Godfather, The (1972) \n",
|
|
"2226 2959 Fight Club (1999) \n",
|
|
"922 1221 Godfather: Part II, The (1974) \n",
|
|
"46 50 Usual Suspects, The (1995) \n",
|
|
"224 260 Star Wars: Episode IV - A New Hope (1977) \n",
|
|
"602 750 Dr. Strangelove or: How I Learned to Stop Worr... \n",
|
|
"914 1213 Goodfellas (1990) \n",
|
|
"461 527 Schindler's List (1993) \n",
|
|
"6710 58559 Dark Knight, The (2008) \n",
|
|
"\n",
|
|
" genres num_ratings mean_rating \n",
|
|
"277 Crime|Drama 317.0 4.429022 \\\n",
|
|
"659 Crime|Drama 192.0 4.289062 \n",
|
|
"2226 Action|Crime|Drama|Thriller 218.0 4.272936 \n",
|
|
"922 Crime|Drama 129.0 4.259690 \n",
|
|
"46 Crime|Mystery|Thriller 204.0 4.237745 \n",
|
|
"224 Action|Adventure|Sci-Fi 251.0 4.231076 \n",
|
|
"602 Comedy|War 97.0 4.268041 \n",
|
|
"914 Crime|Drama 126.0 4.250000 \n",
|
|
"461 Drama|War 220.0 4.225000 \n",
|
|
"6710 Action|Crime|Drama|IMAX 149.0 4.238255 \n",
|
|
"\n",
|
|
" damped_mean_rating \n",
|
|
"277 4.400659 \n",
|
|
"659 4.250077 \n",
|
|
"2226 4.239103 \n",
|
|
"922 4.205148 \n",
|
|
"46 4.203344 \n",
|
|
"224 4.203125 \n",
|
|
"602 4.196407 \n",
|
|
"914 4.194967 \n",
|
|
"461 4.193546 \n",
|
|
"6710 4.191922 "
|
|
]
|
|
},
|
|
"execution_count": 201,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"movies_df.sort_values(by=\"damped_mean_rating\", ascending=False).head(10)"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3 (ipykernel)",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.8.16"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 5
|
|
}
|