import os import random from fs.ftpfs import FTPFS from fs.osfs import OSFS from fs import path import re import requests import bs4 fileSystem = None if os.environ.get("FTP_ADDRESS", False) and os.environ.get("FTP_USER", False) and os.environ.get("FTP_PASS", False): print("FTP") fileSystem = FTPFS(os.environ["FTP_ADDRESS"], user=os.environ["FTP_USER"], passwd=os.environ["FTP_PASS"], timeout=600) else: print("OS") fileSystem = OSFS(os.getcwd()) def format_string(text): text = text.replace(" ", "").replace("-", "").replace("_", "").lower() return text def get_all_item_urls(): page = requests.get("https://deeptownguide.com/Items") item_urls = [] if page.status_code == 200: regex = re.compile(r"/Items/Details/[0-9]+/([a-zA-Z0-9]|-)*", re.MULTILINE) item_urls_match = regex.finditer(str(page.content)) for match in item_urls_match: if "https://deeptownguide.com" + match.group(0) not in item_urls: item_urls.append("https://deeptownguide.com" + match.group(0)) return item_urls def get_item_info(url): result = {"type": None, "building": None, "value": None, "quantity": 0, "needed": {}} page = requests.get(url) texte = str(page.content).replace(" ", "").replace("\n", "").replace(r"\n", "") # regex used to find infos type_regex = re.compile(r"Type
\w*") value_regex = re.compile(r"SellPrice
([0-9]|,)*") building_regex = re.compile(r"\w*iscreatedfromthisrecipe" r"BuildingNameUnlockedatDepthCost" r"ToUnlockTimeRequiredAmountCreatedItemsRequired\w*iscreatedfromthisrecipeBuildingNameUnlockedatDepthCostToUnlockTimeRequiredAmountCreatedItemsRequired\w*[0-" r"9]*([0-9]|,)*([0-9]+|Seconds?|Minutes?|Hours?)+") quantity_regex = re.compile(r"\w*iscreatedfromthisrecipe" r"BuildingNameUnlockedatDepthCost" r"ToUnlockTimeRequiredAmountCreatedItemsRequired\w*[0-9]*([0-9]|,)*([0-9]+|Seconds?|Minutes?|" r"Hours?)+[0-9]+") needed_regex = re.compile(r"(\w+(
)?)+") type_iter = type_regex.finditer(str(texte)) value_iter = value_regex.finditer(str(texte)) building_iter = building_regex.finditer(str(texte)) time_iter = time_regex.finditer(str(texte)) quantity_iter = quantity_regex.finditer(str(texte)) needed_iter = needed_regex.finditer(str(texte)) # Extract value from regex result result["type"] = format_string(re.sub(r"Type
", "", str(type_iter.__next__().group(0)))) result["value"] = int( re.sub(r"SellPrice
", "", str(value_iter.__next__().group(0))).replace( ",", "")) # Extract for recipe try: result["building"] = format_string(re.sub( r"\w*iscreatedfromthisrecipe" r"BuildingNameUnlockedatDepthCost" r"ToUnlockTimeRequiredAmountCreatedItemsRequired\w*iscreatedfromthisrecipeBuildingNameUnlockedatDepthCostToUnlockTimeRequiredAmountCreatedItemsRequired\w*[0-" r"9]*([0-9]|,)*", "", str(time_iter.__next__().group(0)))) # Time: time_str = time_str.replace("s", "") # remove plural time_list = re.split("([0-9]+)", time_str) if time_list[0] == '': del time_list[0] time = 0 for number, unit in zip(time_list[::2], time_list[1::2]): if unit == "Second": time += int(number) elif unit == "Minute": time += int(number) * 60 elif unit == "Hour": time += int(number) * 60 * 60 print(time) result["quantity"] = int(str(re.sub("\w*iscrea" "tedfromthisrecipeBuild" "ingNameUnlockedatDepthCostToUnlockTimeRequired<" "/th>AmountCreatedItemsRequired\w*([0-9]|,)*([0-9]|,)*([0-9]+|Seconds?" "|Minutes?|Hours?)+", "", quantity_iter.__next__().group(0)))) needed_text = re.sub(r"", "", needed_iter.__next__().group(0)) item_name_iter = re.finditer(r"[A-Za-z]+[0-9]+", str(needed_text)) for item_name_match, item_quantity_match in zip(item_name_iter, item_quantity_iter): item_name = re.sub(r"[A-Za-z]+", "", item_quantity_match.group(0))) result["needed"].update({format_string(item_name): item_quantity}) except StopIteration: pass return result def get_sector_info(): page = requests.get("https://deeptownguide.com/Items") num_regex = re.compile(r"[0-9]+") def update_data(file_system): items = {} urls_item = get_all_item_urls() for item_url in urls_item: items.update({ format_string(re.sub("https://deeptownguide.com/Items/Details/[0-9]+/", "", item_url)): get_item_info( item_url) }) return None if __name__ == "__main__": update_data()