1
0
Fork 0
This repository has been archived on 2023-07-22. You can view files and clone it, but cannot push or open issues or pull requests.
dailyreleases/dailyreleases/parsing.py

229 lines
6.2 KiB
Python

import logging
import re
from dataclasses import dataclass, field
from datetime import datetime, timedelta
from enum import Enum
from typing import List, Dict, Iterable
from . import stores
from .predbs import Pre
from .stores import steam
logger = logging.getLogger(__name__)
class ReleaseType(str, Enum):
GAME = "Game"
UPDATE = "Update"
DLC = "DLC"
def __str__(self) -> str:
return self.value
class Platform(str, Enum):
WINDOWS = "Windows"
OSX = "Mac OSX"
LINUX = "Linux"
def __str__(self) -> str:
return self.value
@dataclass
class Release:
dirname: str
nfo_link: str
timestamp: datetime
rls_name: str # dirname without group
group: str
game_name: str
type: ReleaseType
platform: Platform
store_links: Dict[str, str] = field(default_factory=dict)
tags: List[str] = field(default_factory=list)
highlights: List[str] = field(default_factory=list)
score: int = -1 # score and number of reviews is -1 by default; it is updated if the game exists on Steam
num_reviews: int = -1
class ParseError(Exception):
pass
STOPWORDS = (
"update",
"v[0-9]+",
"build[._-]?[0-9]+",
"iNTERNAL",
"incl",
"Standalone",
"Multilanguage",
"DLC",
"DLC[._-]?Unlocker",
"Steam[._-]?Edition",
"GOG",
"mac[._-]?os[._-]?x?",
"linux",
)
TAGS = (
"Hotfix",
"Crack[._-]?fix",
"Dir[._-]?fix",
"MULTI[._-]?[0-9]+",
"x(?:86|64)",
"(?:86|64)[._-]?bit",
"RIP",
"REPACK",
"German",
"Czech",
"Russian",
"Korean",
"Italian",
"Swedish",
"Danish",
"French",
"Slovak",
)
HIGHLIGHTS = (
"PROPER",
"READNFO",
)
BLACKLISTED = (
"Keygen",
"Keymaker",
"[._-]3DS",
"[._-]NSW",
"[._-]PS4",
"[._-]PSP",
"[._-]Wii",
"[._-]WiiU",
"x264",
"720p",
"1080p",
"eBook",
"TUTORIAL",
"Debian",
"Ubuntu",
"Fedora",
"openSUSE",
"jQuery",
"CSS"
"ASP[._-]NET",
"Windows[._-]Server",
"Lynda",
"OREILLY"
"Wintellectnow",
"3ds[._-]?Max",
"For[._-]Maya",
"Cinema4D",
)
def parse_pre(pre: Pre, offline=False) -> Release:
if re.search("|".join(BLACKLISTED), pre.dirname, flags=re.IGNORECASE):
raise ParseError("Contains blacklisted word")
if pre.timestamp < datetime.utcnow() - timedelta(hours=48):
raise ParseError("Older than 48 hours")
logger.info("---")
logger.info("Parsing: %s", pre.dirname)
# Extract group name
rls_name, group = pre.dirname.rsplit("-", maxsplit=1)
# Find game name by matching until one of the stopwords
game_name, *stopwords = re.split("[._-]({})".format("|".join(STOPWORDS + TAGS + HIGHLIGHTS)),
rls_name, flags=re.IGNORECASE)
# Prettify game name by substituting word delimiters with spaces
game_name = re.sub("[_-]", " ", game_name)
# Only dots separated by at least two character on either side are substituted to allow titles like "R.O.V.E.R."
game_name = re.sub("[.](\w{2,})", " \g<1>", game_name)
game_name = re.sub("(\w{2,})[.]", "\g<1> ", game_name)
# Some stopwords distinguishes two otherwise identical releases (e.g. x86/x64) - we call these tags
tags = [stopword
for stopword in stopwords
if re.match("|".join(TAGS), stopword, flags=re.IGNORECASE)]
# Some stopwords signify an important piece of information and deserve to be highlighted (e.g. PROPER)
highlights = [stopword
for stopword in stopwords
if re.match("|".join(HIGHLIGHTS), stopword, flags=re.IGNORECASE)]
# Find platform
if re.search("mac[._-]?os[._-]?x?", rls_name, flags=re.IGNORECASE):
platform = Platform.OSX
elif re.search("linux", rls_name, flags=re.IGNORECASE):
platform = Platform.LINUX
else:
platform = Platform.WINDOWS
# Find release type (Game/DLC/Update)
# Order of the if-statements is important: Update trumps DLC because an update to a DLC is an update, not a DLC!
if re.search("update|v[0-9]|addon|Crack[._-]?fix|DIR[._-]?FIX|build[._-]?[0-9]+", rls_name, flags=re.IGNORECASE):
rls_type = ReleaseType.UPDATE
elif re.search("(?<!incl[._-])dlc", rls_name, flags=re.IGNORECASE): # 'Incl.DLC' isn't a DLC-release
rls_type = ReleaseType.DLC
else:
rls_type = ReleaseType.GAME
logger.info("Offline: %s %s : %s - %s", platform, rls_type, game_name, group)
logger.info("Tags: %s. Highlights: %s", tags, highlights)
release = Release(
dirname=pre.dirname,
nfo_link=pre.nfo_link,
timestamp=pre.timestamp,
rls_name=rls_name,
group=group,
game_name=game_name,
type=rls_type,
platform=platform,
tags=tags,
highlights=highlights
)
if offline:
return release
# Find store links
release.store_links = stores.find_store_links(game_name)
# No store link? Probably software and not a game
if not release.store_links:
raise ParseError("No store link: probably software")
# If one of the store links we found is to Steam, use their API to get (better) information about the game.
if "Steam" in release.store_links:
try:
steam.update_info(release)
except Exception as e: # a lot of stuff can go wrong with Steam's API, better catch everything
logger.error("Failed to update release info using Steam's API on %s", release)
logger.exception(e)
logger.info("Final : %s %s : %s - %s : %s", release.platform, release.type, release.game_name, release.group,
release)
return release
Releases = Dict[Platform, Dict[ReleaseType, List[Release]]] # {Windows: {Game: [..], DLC: [..], ..}, Linux: ...}
def parse_pres(pres: Iterable[Pre]) -> Releases:
releases = {platform: {release_type: [] for release_type in ReleaseType} for platform in Platform}
for pre in pres:
try:
release = parse_pre(pre)
releases[release.platform][release.type].append(release)
except ParseError as e:
logger.info("Skipping %s: %s", pre.dirname, e)
logger.debug("Parsed releases: %s", releases)
return releases