1
0
Fork 0
This repository has been archived on 2023-07-22. You can view files and clone it, but cannot push or open issues or pull requests.
dailyreleases/dailyreleases/parsing.py

227 lines
6.2 KiB
Python

import logging
import re
import string
from dataclasses import dataclass, field
from datetime import datetime, timedelta
from enum import Enum
from typing import List, Dict, Iterable
from . import stores
from .predbs import Pre
from .stores import steam
logger = logging.getLogger(__name__)
class ReleaseType(str, Enum):
GAME = "Game"
UPDATE = "Update"
DLC = "DLC"
def __str__(self) -> str:
return self.value
class Platform(str, Enum):
WINDOWS = "Windows"
OSX = "Mac OSX"
LINUX = "Linux"
def __str__(self) -> str:
return self.value
@dataclass
class Release:
dirname: str
nfo_link: str
timestamp: datetime
rls_name: str # dirname without group
group: str
game_name: str
type: ReleaseType
platform: Platform
store_links: Dict[str, str] = field(default_factory=dict)
tags: List[str] = field(default_factory=list)
highlights: List[str] = field(default_factory=list)
score: int = -1 # score and number of reviews is -1 by default; it is updated if the game exists on Steam
num_reviews: int = -1
class ParseError(Exception):
pass
STOPWORDS = (
"update",
"v[0-9]+",
"build[._-]?[0-9]+",
"iNTERNAL",
"incl",
"Standalone",
"Multilanguage",
"DLC",
"DLC[._-]?Unlocker",
"Steam[._-]?Edition",
"GOG",
"mac[._-]?os[._-]?x?",
"linux",
)
TAGS = (
"Hotfix",
"Crack[._-]?fix",
"Dir[._-]?fix",
"MULTI[._-]?[0-9]+",
"x(?:86|64)",
"(?:86|64)[._-]?bit",
"RIP",
"REPACK",
"German",
"Czech",
"Russian",
"Korean",
"Italian",
"Swedish",
"Danish",
"French",
"Slovak",
)
HIGHLIGHTS = (
"PROPER",
"READNFO",
)
BLACKLISTED = (
"Keygen",
"Keymaker",
"[._-]3DS",
"[._-]NSW",
"[._-]PS4",
"[._-]PSP",
"[._-]Wii",
"[._-]WiiU",
"x264",
"720p",
"1080p",
"eBook",
"TUTORIAL",
"Debian",
"Ubuntu",
"Fedora",
"openSUSE",
"jQuery",
"CSS"
"ASP[._-]NET",
"Windows[._-]Server",
"Lynda",
"OREILLY"
"Wintellectnow",
"3ds[._-]?Max",
"For[._-]Maya",
"Cinema4D",
)
def parse_pre(pre: Pre) -> Release:
if re.search("|".join(BLACKLISTED), pre.dirname, flags=re.IGNORECASE):
raise ParseError("Contains blacklisted word")
if pre.timestamp < datetime.now() - timedelta(hours=48):
raise ParseError("Older than 48 hours")
logger.info("---")
logger.info("Parsing: %s", pre.dirname)
# Extract group name
rls_name, group = pre.dirname.rsplit("-", maxsplit=1)
# Find game name by matching until one of the stopwords
game_name, *stopwords = re.split("[._-]({})".format("|".join(STOPWORDS + TAGS + HIGHLIGHTS)),
rls_name, flags=re.IGNORECASE)
# Prettify game name by substituting word delimiters with spaces and capitalizing each word.
game_name = string.capwords(re.sub("[_-]", " ", game_name))
# Dots separated by fewer than two letters are not substituted to allow titles like "R.O.V.E.R."
game_name = string.capwords(re.sub("[.]([a-zA-Z]{2,}|[0-9]+)", " \g<1>", game_name))
# Some stopwords distinguishes two, otherwise identical, releases (e.g. x86/x64) - we call these tags
tags = [stopword
for stopword in stopwords
if re.match("|".join(TAGS), stopword, flags=re.IGNORECASE)]
# Some stopwords signify an important piece of information and deserve to be highlighted (e.g. PROPER)
highlights = [stopword
for stopword in stopwords
if re.match("|".join(HIGHLIGHTS), stopword, flags=re.IGNORECASE)]
# Find platform
if re.search("mac[._-]?os[._-]?x?", rls_name, flags=re.IGNORECASE):
platform = Platform.OSX
elif re.search("linux", rls_name, flags=re.IGNORECASE):
platform = Platform.LINUX
else:
platform = Platform.WINDOWS
# Find release type (Game/DLC/Update)
# Order of the if-statements is important: Update trumps DLC because an update to a DLC is an update, not a DLC!
if re.search("update|v[0-9]|addon|Crack[._-]?fix|DIR[._-]?FIX|build[._-]?[0-9]+", rls_name, flags=re.IGNORECASE):
rls_type = ReleaseType.UPDATE
elif re.search("(?<!incl[._-])dlc", rls_name, flags=re.IGNORECASE): # 'Incl.DLC' isn't a DLC-release
rls_type = ReleaseType.DLC
else:
rls_type = ReleaseType.GAME
logger.info("Offline: %s %s : %s - %s", platform, rls_type, game_name, group)
logger.info("Tags: %s. Highlights: %s", tags, highlights)
# Find store links
store_links = stores.find_store_links(game_name)
# No store link? Probably software and not a game
if not store_links:
raise ParseError("No store link: probably software")
release = Release(
dirname=pre.dirname,
nfo_link=pre.nfo_link,
timestamp=pre.timestamp,
rls_name=rls_name,
group=group,
game_name=game_name,
type=rls_type,
platform=platform,
store_links=store_links,
tags=tags,
highlights=highlights
)
# If one of the store links we found is to Steam, use their API to get (better) information about the game.
if "Steam" in store_links:
try:
steam.update_info(release)
except Exception as e: # a lot of stuff can go wrong with Steam's API, better catch everything
logger.error("Failed to update release info using Steam's API on %s", release)
logger.exception(e)
logger.info("Final : %s %s : %s - %s : %s", release.platform, release.type, release.game_name, release.group,
release)
return release
Releases = Dict[Platform, Dict[ReleaseType, List[Release]]] # {Windows: {Game: [..], DLC: [..], ..}, Linux: ...}
def parse_pres(pres: Iterable[Pre]) -> Releases:
releases = {platform: {release_type: [] for release_type in ReleaseType} for platform in Platform}
for pre in pres:
try:
release = parse_pre(pre)
releases[release.platform][release.type].append(release)
except ParseError as e:
logger.info("Skipping %s: %s", pre.dirname, e)
logger.debug("Parsed releases: %s", releases)
return releases