This document outlines a Python-based process to persist
youtube.com search queries and associated metadata into a local
SQLite database at:
C:\Temp\projectYouTubeAnalytics\DB\searches.db
searches.db.The script ensures the directory and database exist:
ROOT = r"C:\Temp\projectYouTubeAnalytics\DB"
DB_PATH = ROOT + r"\searches.db"
On first run, the script:
C:\Temp\projectYouTubeAnalytics\DB if missing.searches.db if missing.search_runsCREATE TABLE IF NOT EXISTS search_runs (
id INTEGER PRIMARY KEY AUTOINCREMENT,
started_at TEXT NOT NULL,
finished_at TEXT,
source TEXT NOT NULL, -- e.g., 'api', 'browser_log'
query_count INTEGER DEFAULT 0
);
search_queriesCREATE TABLE IF NOT EXISTS search_queries (
id INTEGER PRIMARY KEY AUTOINCREMENT,
run_id INTEGER NOT NULL REFERENCES search_runs(id),
query_text TEXT NOT NULL,
executed_at TEXT NOT NULL,
result_count INTEGER,
raw_payload TEXT, -- JSON from API or scraped HTML fragment
UNIQUE(run_id, query_text, executed_at)
);
search_results (optional, per-video granularity)CREATE TABLE IF NOT EXISTS search_results (
id INTEGER PRIMARY KEY AUTOINCREMENT,
query_id INTEGER NOT NULL REFERENCES search_queries(id),
video_id TEXT NOT NULL,
title TEXT,
channel TEXT,
published_at TEXT,
position INTEGER, -- rank in search results
raw_payload TEXT
);
python >= 3.10sqlite3 (standard library)requests (if using YouTube Data API or a proxy)python-dotenv for API keysC:\Temp\projectYouTubeAnalytics\DB exists.searches.db.search_runs (start timestamp, source).search_queries.search_results.search_runs.query_count and finished_at.import os
import sqlite3
from datetime import datetime
ROOT = r"C:\Temp\projectYouTubeAnalytics\DB"
DB_PATH = os.path.join(ROOT, "searches.db")
def ensure_db():
os.makedirs(ROOT, exist_ok=True)
conn = sqlite3.connect(DB_PATH)
cur = conn.cursor()
cur.executescript(
"""
CREATE TABLE IF NOT EXISTS search_runs (
id INTEGER PRIMARY KEY AUTOINCREMENT,
started_at TEXT NOT NULL,
finished_at TEXT,
source TEXT NOT NULL,
query_count INTEGER DEFAULT 0
);
CREATE TABLE IF NOT EXISTS search_queries (
id INTEGER PRIMARY KEY AUTOINCREMENT,
run_id INTEGER NOT NULL REFERENCES search_runs(id),
query_text TEXT NOT NULL,
executed_at TEXT NOT NULL,
result_count INTEGER,
raw_payload TEXT,
UNIQUE(run_id, query_text, executed_at)
);
"""
)
conn.commit()
return conn
def start_run(conn, source: str) -> int:
cur = conn.cursor()
now = datetime.utcnow().isoformat()
cur.execute(
"INSERT INTO search_runs (started_at, source) VALUES (?, ?)",
(now, source),
)
conn.commit()
return cur.lastrowid
def record_query(conn, run_id: int, query: str, result_count: int | None, raw_payload: str | None):
cur = conn.cursor()
ts = datetime.utcnow().isoformat()
cur.execute(
"""
INSERT OR IGNORE INTO search_queries
(run_id, query_text, executed_at, result_count, raw_payload)
VALUES (?, ?, ?, ?, ?)
""",
(run_id, query, ts, result_count, raw_payload),
)
conn.commit()
def finalize_run(conn, run_id: int):
cur = conn.cursor()
cur.execute(
"SELECT COUNT(*) FROM search_queries WHERE run_id = ?",
(run_id,),
)
(count,) = cur.fetchone()
now = datetime.utcnow().isoformat()
cur.execute(
"UPDATE search_runs SET finished_at = ?, query_count = ? WHERE id = ?",
(now, count, run_id),
)
conn.commit()
def main():
conn = ensure_db()
run_id = start_run(conn, source="api")
# Example: static list; in practice, drive this from config, CLI, or logs
queries = ["mit ai lab", "distributed systems lecture", "python asyncio tutorial"]
for q in queries:
# TODO: integrate YouTube Data API or local capture here
# result_count, raw_payload = fetch_youtube_search(q)
result_count, raw_payload = None, None
record_query(conn, run_id, q, result_count, raw_payload)
finalize_run(conn, run_id)
conn.close()
if __name__ == "__main__":
main()
youtube.com/results?search_query=....search_results table for per-video analytics.searches.db.