5. Testing database operations

Database tests need a real database, not a mock. You're verifying that SQL behaves correctly, which mocking can't tell you. SQLite's :memory: mode gives you real query parsing, joins, constraints, and transactions, with no disk I/O and no file to clean up.

Why in-memory SQLite, not mocks

Mocking sqlite3 would test your Python code's structure, not its SQL semantics. On a real database, a bad column name, broken join, or missing constraint fails exactly the way it would fail in the app. Each test gets a fresh in-memory connection, so isolation is automatic.

The Music Time Machine now has two core Chapter 16 tables: tracks and snapshots. The track row also stores raw_json, which preserves the original Spotify payload for later JSON work. Every database test needs those tables present, and most need a few rows seeded for realism.

Two fixtures: in_memory_db and seeded_db

Add these to tests/conftest.py alongside the mock fixtures from the previous page:

tests/conftest.py (continued)

import pytest
import sqlite3


@pytest.fixture
def in_memory_db():
    """Empty in-memory SQLite with the supported Chapter 16 schema."""
    conn = sqlite3.connect(':memory:')
    conn.row_factory = sqlite3.Row
    cursor = conn.cursor()
    cursor.executescript("""
        CREATE TABLE tracks (
            track_id TEXT PRIMARY KEY,
            name TEXT NOT NULL,
            artist_name TEXT NOT NULL,
            album_name TEXT,
            duration_ms INTEGER,
            spotify_url TEXT,
            raw_json TEXT
        );
        CREATE TABLE snapshots (
            track_id TEXT NOT NULL,
            snapshot_date TEXT NOT NULL,
            time_range TEXT NOT NULL,
            rank INTEGER NOT NULL,
            PRIMARY KEY (track_id, snapshot_date, time_range),
            FOREIGN KEY (track_id) REFERENCES tracks(track_id)
        );
    """)
    conn.commit()
    yield conn
    conn.close()


@pytest.fixture
def seeded_db(in_memory_db):
    """In-memory db pre-populated with three months of snapshots."""
    conn = in_memory_db
    cursor = conn.cursor()

    tracks = [
        ('track_1', 'Karma Police', 'Radiohead', 'OK Computer', 261000, None, '{}'),
        ('track_2', 'Paranoid Android', 'Radiohead', 'OK Computer', 383000, None, '{}'),
        ('track_3', 'Bohemian Rhapsody', 'Queen', 'A Night at the Opera', 355000, None, '{}'),
        ('track_4', 'Yesterday', 'The Beatles', 'Help!', 125000, None, '{}'),
        ('track_5', 'Mr. Brightside', 'The Killers', 'Hot Fuss', 222000, None, '{}'),
    ]
    cursor.executemany(
        "INSERT INTO tracks VALUES (?, ?, ?, ?, ?, ?, ?)", tracks
    )

    snapshots = []
    for date, ids in [
        ('2026-01-15', ['track_1', 'track_2', 'track_3']),
        ('2026-02-15', ['track_1', 'track_3', 'track_4']),
        ('2026-03-15', ['track_1', 'track_4', 'track_5']),
    ]:
        for rank, tid in enumerate(ids, start=1):
            snapshots.append((tid, date, 'short_term', rank))
    cursor.executemany(
        """INSERT INTO snapshots
           (track_id, snapshot_date, time_range, rank)
           VALUES (?, ?, ?, ?)""",
        snapshots,
    )
    conn.commit()
    return conn

Two fixtures, two responsibilities. in_memory_db sets up the schema and yields the connection; the yield form runs cleanup after the test finishes. seeded_db composes that fixture and adds realistic snapshot history on top.

Test target: calculate_taste_stats

Chapter 18's calculate_taste_stats returns dashboard-card values. The interesting branch is the empty-state behaviour: when fewer than two snapshots exist, turnover_pct should be None rather than a misleading zero.

tests/test_database.py

from unittest.mock import patch

from app import calculate_taste_stats


def test_taste_stats_empty_db_returns_zero_tracks(in_memory_db):
    with patch('app.get_db_connection', return_value=in_memory_db):
        stats = calculate_taste_stats()

    assert stats['tracks_tracked'] == 0
    assert stats['turnover_pct'] is None
    assert stats['consistent_artist'] is None


def test_taste_stats_seeded_db_returns_track_count(seeded_db):
    with patch('app.get_db_connection', return_value=seeded_db):
        stats = calculate_taste_stats()

    assert stats['tracks_tracked'] == 5
    assert stats['turnover_pct'] is not None
    assert stats['consistent_artist'] == 'Radiohead'


def test_taste_stats_one_snapshot_returns_none_turnover(in_memory_db):
    cursor = in_memory_db.cursor()
    cursor.execute(
        "INSERT INTO tracks VALUES ('only', 'A Track', 'An Artist', 'An Album', 180000, NULL, '{}')"
    )
    cursor.execute(
        """INSERT INTO snapshots
           (track_id, snapshot_date, time_range, rank)
           VALUES ('only', '2026-03-15', 'short_term', 1)"""
    )
    in_memory_db.commit()

    with patch('app.get_db_connection', return_value=in_memory_db):
        stats = calculate_taste_stats()

    assert stats['tracks_tracked'] == 1
    assert stats['turnover_pct'] is None

Test target: get_taste_chart_data

The home dashboard chart now uses the supported snapshot schema only. It returns discovery counts: tracks whose first-ever snapshot appearance happened in each month.

tests/test_database.py (continued)

from app import get_taste_chart_data


def test_chart_data_returns_discovery_mode(seeded_db):
    with patch('app.get_db_connection', return_value=seeded_db):
        data = get_taste_chart_data()

    assert data['mode'] == 'discoveries'
    assert len(data['datasets']) == 1
    assert data['datasets'][0]['label'] == 'New tracks'
    assert data['labels'] == ['Jan 2026', 'Feb 2026', 'Mar 2026']


def test_chart_data_empty_db_returns_discovery_mode_with_no_points(in_memory_db):
    with patch('app.get_db_connection', return_value=in_memory_db):
        data = get_taste_chart_data()

    assert data['mode'] == 'discoveries'
    assert data['labels'] == []
    assert data['datasets'][0]['data'] == []

Test target: _get_database_status

Chapter 18's _get_database_status reads row counts for the Settings page. It should report the supported tables and tolerate an empty database.

tests/test_database.py (continued)

from app import _get_database_status


def test_status_seeded_db_reports_real_counts(seeded_db):
    with patch('app.get_db_connection', return_value=seeded_db):
        with patch('app.session', {'access_token': 'fake'}):
            status = _get_database_status()

    assert status['tracks_count'] == 5
    assert status['snapshots_count'] == 9
    assert status['is_connected'] is True


def test_status_empty_db_reports_zero_counts(in_memory_db):
    with patch('app.get_db_connection', return_value=in_memory_db):
        with patch('app.session', {}):
            status = _get_database_status()

    assert status['tracks_count'] == 0
    assert status['snapshots_count'] == 0
    assert status['is_connected'] is False

Test target: find_forgotten_gems

Chapter 16's find_forgotten_gems applies three date windows: tracks last seen between 90 and 365 days ago, excluding tracks seen in the last 30 days. The useful tests pin those boundaries with frozen dates.

tests/test_database.py (continued)

from freezegun import freeze_time

from forgotten_gems import find_forgotten_gems


@freeze_time("2026-06-01")
def test_forgotten_gems_finds_old_but_not_recent_tracks(in_memory_db):
    cursor = in_memory_db.cursor()
    cursor.execute(
        "INSERT INTO tracks VALUES ('gem', 'Old Favourite', 'Artist', 'Album', 180000, NULL, '{}')"
    )
    cursor.execute(
        "INSERT INTO tracks VALUES ('recent', 'Still Around', 'Artist', 'Album', 180000, NULL, '{}')"
    )
    cursor.executemany(
        """INSERT INTO snapshots
           (track_id, snapshot_date, time_range, rank)
           VALUES (?, ?, 'short_term', 1)""",
        [
            ('gem', '2026-01-15'),
            ('recent', '2026-01-15'),
            ('recent', '2026-05-20'),
        ],
    )
    in_memory_db.commit()

    gems = find_forgotten_gems(in_memory_db, limit=10)
    ids = {row['track_id'] for row in gems}

    assert ids == {'gem'}

These tests exercise real SQL while keeping the suite fast. That is the sweet spot: realistic enough to catch schema/query mistakes, isolated enough to run on every save.