# INSTALL: pip install requests pillow # # SETUP: Replace the email below with your real contact email. # Scryfall requires an accurate User-Agent per their API policy. # See: https://scryfall.com/docs/api CONTACT_EMAIL = "support@iymtg.com" # <-- UPDATE THIS import os, requests, time, concurrent.futures from PIL import Image from io import BytesIO HEADERS = { 'User-Agent': f'IYmtg/1.0 ({CONTACT_EMAIL})', 'Accept': 'application/json' } OUTPUT_DIR = "Set_Symbol_Training" # Layouts where the set symbol is NOT at the standard M15 position. # Excluding these prevents corrupt/misaligned crops in the training set. EXCLUDED_LAYOUTS = {'transform', 'modal_dfc', 'reversible_card', 'planeswalker', 'saga', 'battle', 'split', 'flip'} def main(): if CONTACT_EMAIL == "support@iymtg.com": print("⚠️ WARNING: Using default contact email in User-Agent. Update CONTACT_EMAIL at the top of this script.") if not os.path.exists(OUTPUT_DIR): os.makedirs(OUTPUT_DIR) print("--- IYmtg Symbol Harvester ---") session = requests.Session() session.headers.update(HEADERS) try: response = session.get("https://api.scryfall.com/sets", timeout=15) response.raise_for_status() all_sets = response.json().get('data', []) except Exception as e: print(f"Error fetching sets: {e}") return valid_types = ['core', 'expansion', 'masters', 'draft_innovation'] target_sets = [s for s in all_sets if s.get('set_type') in valid_types] print(f"Found {len(target_sets)} valid sets.") # OPTIMIZATION: Process sets in parallel to speed up dataset creation. # max_workers=5 keeps concurrent requests well within Scryfall's 10 req/s limit. with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor: executor.map(lambda s: process_set(s, session), target_sets) print("DONE. Drag 'Set_Symbol_Training' into Create ML -> Image Classification.") def process_set(set_obj, session): set_code = set_obj['code'] print(f"Processing {set_code}...") set_dir = os.path.join(OUTPUT_DIR, set_code.upper()) os.makedirs(set_dir, exist_ok=True) try: # Filter to standard single-faced cards to guarantee reliable symbol crop coordinates. url = f"https://api.scryfall.com/cards/search?q=set:{set_code}+unique:prints+layout:normal&per_page=5" resp = session.get(url, timeout=10) if resp.status_code == 404: # No normal-layout cards found; fall back to any card type url = f"https://api.scryfall.com/cards/search?q=set:{set_code}+unique:prints&per_page=5" resp = session.get(url, timeout=10) if resp.status_code != 200: print(f"Skipping {set_code}: HTTP {resp.status_code}") return cards_resp = resp.json() saved = 0 for i, card in enumerate(cards_resp.get('data', [])): # Skip layouts where the symbol position differs from the standard M15 crop area if card.get('layout', '') in EXCLUDED_LAYOUTS: continue image_url = None uris = {} if 'image_uris' in card: uris = card['image_uris'] elif 'card_faces' in card and len(card['card_faces']) > 0 and 'image_uris' in card['card_faces'][0]: uris = card['card_faces'][0]['image_uris'] if 'large' in uris: image_url = uris['large'] elif 'normal' in uris: image_url = uris['normal'] if image_url: try: # Brief sleep between image downloads to respect Scryfall rate limits time.sleep(0.05) img_resp = session.get(image_url, timeout=10) if img_resp.status_code != 200: continue try: img = Image.open(BytesIO(img_resp.content)) img.verify() # Verify integrity img = Image.open(BytesIO(img_resp.content)) # Re-open after verify except Exception: print(f"Skipping corrupt image in {set_code}") continue width, height = img.size # WARNING: This crop area is tuned for standard M15+ single-faced cards. # Excluded layouts (DFC, Planeswalker, Saga, etc.) are filtered above. crop_area = (width * 0.85, height * 0.58, width * 0.95, height * 0.65) symbol = img.crop(crop_area) symbol = symbol.convert("RGB") symbol.save(os.path.join(set_dir, f"sample_{i}.jpg")) saved += 1 except Exception as e: print(f"Error downloading image for {set_code}: {e}") if saved == 0: print(f"⚠️ No usable images saved for {set_code}") except Exception as e: print(f"Error searching cards for {set_code}: {e}") if __name__ == "__main__": main()