From a59f2599bd55ed0e35a206e9bb09d0d6ff7414c6 Mon Sep 17 00:00:00 2001 From: Lukas Winkler Date: Sat, 4 Jul 2020 12:20:02 +0200 Subject: [PATCH] initial commit --- .gitignore | 1 + README.md | 26 ++++++++++++++++++ spotifystats.py | 72 +++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 99 insertions(+) create mode 100644 .gitignore create mode 100644 README.md create mode 100644 spotifystats.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..9f11b75 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +.idea/ diff --git a/README.md b/README.md new file mode 100644 index 0000000..e809073 --- /dev/null +++ b/README.md @@ -0,0 +1,26 @@ +# Spotify Stats + +a very simple python script to analyse hours listened by day and most played songs + +### How-To + +- Request your account data from https://www.spotify.com/us/account/privacy/ +- confirm the E-Mail +- up to 30 days later you will get a zip file containing a `MyData` folder containing files similar to those: + - Follow.json + - Payments.json + - Playlist1.json + - Read_Me_First.pdf + - SearchQueries.json + - StreamingHistory0.json + - StreamingHistory1.json + - Userdata.json + - YourLibrary.json +- download the `spotifystats.py` file +- edit the `BASEDIR` path to your `MyData` folder +- run `spotifystats.py` + +### Dependencies + +- python 3.7 or newer +- matplotlib diff --git a/spotifystats.py b/spotifystats.py new file mode 100644 index 0000000..746c448 --- /dev/null +++ b/spotifystats.py @@ -0,0 +1,72 @@ +import json +from collections import Counter +from dataclasses import dataclass +from datetime import datetime +from pathlib import Path + +from matplotlib import pyplot as plt + +# minumum time a song needs to be played to count as "played" +MIN_PLAY_TIME = 60 # seconds + +# only show songs that were played at least this often in the top list +MIN_PLAY_NUM = 3 + +# path to Spotify export +BASEDIR = Path("/home/lukas/Nextcloud/MyData/") + + +@dataclass +class Song: + end_time: str + artist_name: str + track_name: str + ms_played: int + + def datetime(self) -> datetime: + return datetime.strptime(self.end_time, "%Y-%m-%d %H:%M") + + def date(self): + return self.datetime().date() + + @property + def min_played(self) -> float: + return self.ms_played / 1000 / 60 + + +history = [] + +for file in BASEDIR.glob("StreamingHistory*.json"): + with file.open() as f: + data = json.load(f) + for song in data: + try: + song_obj = Song(end_time=song["endTime"], artist_name=song["artistName"], + track_name=song["trackName"], ms_played=song["msPlayed"]) + history.append(song_obj) + except KeyError: + print("this entry doesn't seem to be a song") + continue + +print(f"{len(history)} plays found") + +bins = {} +for song in history: + if song.date() in bins: + bins[song.date()] += song.min_played + else: + bins[song.date()] = song.min_played + +plt.bar(bins.keys(), bins.values()) +plt.ylabel("minutes") + +played_songs = [] +for song in history: + if song.ms_played > MIN_PLAY_TIME * 1000: + played_songs.append((song.artist_name, song.track_name)) + +for (artist, title), num_played in sorted(Counter(played_songs).most_common(), key=lambda c: [-c[1], c[0][0], c[0][1]]): + if num_played >= MIN_PLAY_NUM: + print(f"{num_played:>4} {artist}: {title}") + +plt.show()