import csv import os import re from collections import defaultdict folder = '/tmp/jan2026_csv/' # Museum name mapping - order matters! Check more specific patterns first def get_museum(ticket_name): # Some tickets have both "الوحي" and "القرآن" - prioritize based on actual content # Pattern: "معرض الوحي | متحف القرآن الكريم" = Revelation Exhibition ticket (القرآن is just venue) ticket_lower = ticket_name.lower() # Check Revelation first (الوحي or revelation) - this takes priority if 'الوحي' in ticket_name or 'revelation' in ticket_lower: return "Revelation Museum" # Then Qur'an (only if الوحي not present) if 'holy qur' in ticket_lower or 'quraan' in ticket_lower or 'quran' in ticket_lower or 'القرآن' in ticket_name: return "Qur'an Museum" # Best of Creation if 'best of creation' in ticket_lower or 'خير الخلق' in ticket_name: return "The Best Of Creation" # Creation Story if 'creation story' in ticket_lower or 'قصة الخلق' in ticket_name: return "Creation Story Museum" return None results = [] for filename in os.listdir(folder): if not filename.endswith('.csv'): continue filepath = os.path.join(folder, filename) with open(filepath, 'r', encoding='utf-8', errors='ignore') as f: lines = f.readlines() # Extract date from line 5 date_match = re.search(r'(\d+/\d+/\d+)', lines[4] if len(lines) > 4 else '') if not date_match: continue date_str = date_match.group(1) # Parse as m/d/yyyy and convert to yyyy-mm-dd parts = date_str.split('/') date_iso = f"{parts[2]}-{int(parts[0]):02d}-{int(parts[1]):02d}" # Daily aggregation by museum daily = defaultdict(lambda: {'tickets': 0, 'visits': 0, 'revenue': 0.0}) # Parse CSV rows reader = csv.reader(lines) rows = list(reader) for row in rows: if len(row) < 6: continue # Check if this is a ticket row (has site name and ticket name) site_name = row[0].strip() if row[0] else '' if 'Hiraa' not in site_name and 'Safiyyah' not in site_name: continue ticket_name = row[2].strip() if len(row) > 2 else '' museum = get_museum(ticket_name) if not museum: continue try: quantity = int(row[3].replace(',', '')) if row[3].strip() else 0 people = int(row[4].replace(',', '')) if row[4].strip() else 0 gross_sale = float(row[5].replace(',', '')) if row[5].strip() else 0.0 except (ValueError, IndexError): continue daily[museum]['tickets'] += quantity daily[museum]['visits'] += people daily[museum]['revenue'] += gross_sale for museum, data in daily.items(): results.append({ 'date': date_iso, 'museum': museum, 'tickets': data['tickets'], 'visits': data['visits'], 'revenue': data['revenue'] }) # Sort by date and museum results.sort(key=lambda x: (x['date'], x['museum'])) # Output as CSV print("date,museum,tickets,visits,revenue") for r in results: print(f"{r['date']},{r['museum']},{r['tickets']},{r['visits']},{r['revenue']:.2f}")