\w* | [0-9]* | ([0-9]|,)* | ([0-9]+|Seconds?|Minutes?|"
r"Hours?)+ | [0-9]+")
needed_regex = re.compile(r" | (\w+( )?)+")
type_iter = type_regex.finditer(str(texte))
value_iter = value_regex.finditer(str(texte))
building_iter = building_regex.finditer(str(texte))
time_iter = time_regex.finditer(str(texte))
quantity_iter = quantity_regex.finditer(str(texte))
needed_iter = needed_regex.finditer(str(texte))
# Extract value from regex result
result["type"] = format_string(re.sub(r"Type ", "", str(type_iter.__next__().group(0))))
result["value"] = int(
re.sub(r"SellPrice ", "", str(value_iter.__next__().group(0))).replace(
",", ""))
# Extract for recipe
try:
result["building"] = format_string(re.sub(
r"\w*iscreatedfromthisrecipe"
r"BuildingName | UnlockedatDepth | Cost"
r"ToUnlock | TimeRequired | AmountCreated | ItemsRequired | "
r"thead>\w*iscreatedfromthisrecipeBuildingName | UnlockedatDepth | CostToUnlock | TimeRequired | AmountCreated | ItemsRequired |
---|
\w* | [0-"
r"9]* | ([0-9]|,)* | ",
"",
str(time_iter.__next__().group(0))))
# Time:
time_str = time_str.replace("s", "") # remove plural
time_list = re.split("([0-9]+)", time_str)
if time_list[0] == '':
del time_list[0]
time = 0
for number, unit in zip(time_list[::2], time_list[1::2]):
if unit == "Second":
time += int(number)
elif unit == "Minute":
time += int(number) * 60
elif unit == "Hour":
time += int(number) * 60 * 60
print(time)
result["quantity"] = int(str(re.sub("\w*iscrea"
"tedfromthisrecipeBuild"
"ingName | UnlockedatDepth | CostToUnlock | TimeRequired<"
"/th> | AmountCreated | ItemsRequired |
---|
\w*([0-9]|,)* | ([0-9]|,)* | ([0-9]+|Seconds?"
"|Minutes?|Hours?)+ | ",
"",
quantity_iter.__next__().group(0))))
needed_text = re.sub(r" | ", "", needed_iter.__next__().group(0))
item_name_iter = re.finditer(r"[A-Za-z]+[0-9]+", str(needed_text))
for item_name_match, item_quantity_match in zip(item_name_iter, item_quantity_iter):
item_name = re.sub(r"[A-Za-z]+", "", item_quantity_match.group(0)))
result["needed"].update({format_string(item_name): item_quantity})
except StopIteration:
pass
return result
def get_sector_info():
page = requests.get("https://deeptownguide.com/Items")
num_regex = re.compile(r"[0-9]+")
def update_data(file_system):
items = {}
urls_item = get_all_item_urls()
for item_url in urls_item:
items.update({
format_string(re.sub("https://deeptownguide.com/Items/Details/[0-9]+/", "", item_url)): get_item_info(
item_url)
})
return None
if __name__ == "__main__":
update_data()
| | | |