Add January 2026 sales data import scripts
- process_daily_sales.py: Parse XLS sales reports and extract museum-by-museum data - january_2026_data.csv: Verified data for all 31 days, 4 museums (124 records) Data imported to NocoDB DailyStats table: - Qur'an Museum: 8,493 visits, 196,450 SAR - Revelation Museum: 68,449 visits, 1,637,290 SAR - Creation Story Museum: 32,926 visits, 697,225 SAR - The Best Of Creation: 15,110 visits, 263,783 SAR - Total: 124,978 visits, 2,794,748 SAR
This commit is contained in:
101
scripts/process_daily_sales.py
Normal file
101
scripts/process_daily_sales.py
Normal file
@@ -0,0 +1,101 @@
|
||||
import csv
|
||||
import os
|
||||
import re
|
||||
from collections import defaultdict
|
||||
|
||||
folder = '/tmp/jan2026_csv/'
|
||||
|
||||
# Museum name mapping - order matters! Check more specific patterns first
|
||||
def get_museum(ticket_name):
|
||||
# Some tickets have both "الوحي" and "القرآن" - prioritize based on actual content
|
||||
# Pattern: "معرض الوحي | متحف القرآن الكريم" = Revelation Exhibition ticket (القرآن is just venue)
|
||||
|
||||
ticket_lower = ticket_name.lower()
|
||||
|
||||
# Check Revelation first (الوحي or revelation) - this takes priority
|
||||
if 'الوحي' in ticket_name or 'revelation' in ticket_lower:
|
||||
return "Revelation Museum"
|
||||
|
||||
# Then Qur'an (only if الوحي not present)
|
||||
if 'holy qur' in ticket_lower or 'quraan' in ticket_lower or 'quran' in ticket_lower or 'القرآن' in ticket_name:
|
||||
return "Qur'an Museum"
|
||||
|
||||
# Best of Creation
|
||||
if 'best of creation' in ticket_lower or 'خير الخلق' in ticket_name:
|
||||
return "The Best Of Creation"
|
||||
|
||||
# Creation Story
|
||||
if 'creation story' in ticket_lower or 'قصة الخلق' in ticket_name:
|
||||
return "Creation Story Museum"
|
||||
|
||||
return None
|
||||
|
||||
results = []
|
||||
|
||||
for filename in os.listdir(folder):
|
||||
if not filename.endswith('.csv'):
|
||||
continue
|
||||
|
||||
filepath = os.path.join(folder, filename)
|
||||
|
||||
with open(filepath, 'r', encoding='utf-8', errors='ignore') as f:
|
||||
lines = f.readlines()
|
||||
|
||||
# Extract date from line 5
|
||||
date_match = re.search(r'(\d+/\d+/\d+)', lines[4] if len(lines) > 4 else '')
|
||||
if not date_match:
|
||||
continue
|
||||
date_str = date_match.group(1)
|
||||
|
||||
# Parse as m/d/yyyy and convert to yyyy-mm-dd
|
||||
parts = date_str.split('/')
|
||||
date_iso = f"{parts[2]}-{int(parts[0]):02d}-{int(parts[1]):02d}"
|
||||
|
||||
# Daily aggregation by museum
|
||||
daily = defaultdict(lambda: {'tickets': 0, 'visits': 0, 'revenue': 0.0})
|
||||
|
||||
# Parse CSV rows
|
||||
reader = csv.reader(lines)
|
||||
rows = list(reader)
|
||||
|
||||
for row in rows:
|
||||
if len(row) < 6:
|
||||
continue
|
||||
|
||||
# Check if this is a ticket row (has site name and ticket name)
|
||||
site_name = row[0].strip() if row[0] else ''
|
||||
if 'Hiraa' not in site_name and 'Safiyyah' not in site_name:
|
||||
continue
|
||||
|
||||
ticket_name = row[2].strip() if len(row) > 2 else ''
|
||||
museum = get_museum(ticket_name)
|
||||
if not museum:
|
||||
continue
|
||||
|
||||
try:
|
||||
quantity = int(row[3].replace(',', '')) if row[3].strip() else 0
|
||||
people = int(row[4].replace(',', '')) if row[4].strip() else 0
|
||||
gross_sale = float(row[5].replace(',', '')) if row[5].strip() else 0.0
|
||||
except (ValueError, IndexError):
|
||||
continue
|
||||
|
||||
daily[museum]['tickets'] += quantity
|
||||
daily[museum]['visits'] += people
|
||||
daily[museum]['revenue'] += gross_sale
|
||||
|
||||
for museum, data in daily.items():
|
||||
results.append({
|
||||
'date': date_iso,
|
||||
'museum': museum,
|
||||
'tickets': data['tickets'],
|
||||
'visits': data['visits'],
|
||||
'revenue': data['revenue']
|
||||
})
|
||||
|
||||
# Sort by date and museum
|
||||
results.sort(key=lambda x: (x['date'], x['museum']))
|
||||
|
||||
# Output as CSV
|
||||
print("date,museum,tickets,visits,revenue")
|
||||
for r in results:
|
||||
print(f"{r['date']},{r['museum']},{r['tickets']},{r['visits']},{r['revenue']:.2f}")
|
||||
Reference in New Issue
Block a user