\w* | [0-9]* | ([0-9]|,)* | ([0-9]+|Seconds?|Minutes?|"
r"Hours?)+ | [0-9]+")
needed_regex = re.compile(r" | ((\w|,)+ )+")
type_iter = type_regex.finditer(str(texte))
value_iter = value_regex.finditer(str(texte))
building_iter = building_regex.finditer(str(texte))
time_iter = time_regex.finditer(str(texte))
quantity_iter = quantity_regex.finditer(str(texte))
needed_iter = needed_regex.finditer(str(texte))
# Extract value from regex result
result["type"] = format_string(re.sub(r"Type ", "", str(type_iter.__next__().group(0))))
result["value"] = int(
re.sub(r"SellPrice ", "", str(value_iter.__next__().group(0))).replace(
",", ""))
# Extract for recipe
try:
result["building"] = format_string(re.sub(
r"\w*iscreatedfromthisrecipe"
r"BuildingName | UnlockedatDepth | Cost"
r"ToUnlock | TimeRequired | AmountCreated | ItemsRequired | "
r"thead>\w*iscreatedfromthisrecipeBuildingName | UnlockedatDepth | CostToUnlock | TimeRequired | AmountCreated | ItemsRequired |
---|
\w* | [0-"
r"9]* | ([0-9]|,)* | ",
"",
str(time_iter.__next__().group(0))))
# Time:
time_str = time_str.replace("s", "") # remove plural
time_list = re.split("([0-9]+)", time_str)
if time_list[0] == '':
del time_list[0]
time = 0
for number, unit in zip(time_list[::2], time_list[1::2]):
if unit == "Second":
time += int(number)
elif unit == "Minute":
time += int(number) * 60
elif unit == "Hour":
time += int(number) * 60 * 60
result['time'] = int(time)
result["quantity"] = int(str(re.sub("\w*iscrea"
"tedfromthisrecipeBuild"
"ingName | UnlockedatDepth | CostToUnlock | TimeRequired<"
"/th> | AmountCreated | ItemsRequired |
---|
\w*([0-9]|,)* | ([0-9]|,)* | ([0-9]+|Seconds?"
"|Minutes?|Hours?)+ | ",
"",
quantity_iter.__next__().group(0))))
needed_text = re.sub(r" | ", "", needed_iter.__next__().group(0))
item_name_iter = re.finditer(r"[A-Za-z]+([0-9]|,)+", str(needed_text))
for item_name_match, item_quantity_match in zip(item_name_iter, item_quantity_iter):
item_name = re.sub(r"[A-Za-z]+", "", item_quantity_match.group(0)).replace(",", "").replace(
".", ""))
result["needed"].update({format_string(item_name): item_quantity})
except StopIteration:
pass
return result
def get_sector_info():
page = requests.get("https://deeptownguide.com/Areas/Resources")
texte = str(page.content).replace(" ", "").replace("\n", "").replace(r"\n", "")
line_regex = re.compile(r"[0-9]+(( \w* ([0-9]|\.|%)+| ) | )+")
num_regex = re.compile(r"[0-9]+")
item_regex = re.compile(r"( \w* ([0-9]|\.|%)+| )"
r" | ")
item_name_regex = re.compile(r"(([0-9]|\.)+")
line_iter = line_regex.finditer(texte)
etages = {}
liste_items = []
for line in line_iter:
etage_iter = num_regex.finditer(line.group(0))
etage = int(re.sub(r"", "", etage_iter.__next__().group(0)))
item_iter = item_regex.finditer(line.group(0))
items = {}
for item in item_iter:
name_iter = item_name_regex.finditer(item.group(0))
name = str(re.sub(r"(", "", quantity_iter.__next__().group(0))) / 100
items.update({name: quantity})
if name not in liste_items:
liste_items.append(name)
etages.update({str(etage): items})
etages.update({"0": {name: 0 for name in liste_items}})
return etages
def update_data():
items = {}
urls_item = get_all_item_urls()
print(len(urls_item))
a = 0
for item_url in urls_item:
a += 1
items.update({
str(format_string(re.sub("https://deeptownguide.com/Items/Details/[0-9]+/", "", item_url))):
get_item_info(item_url)
})
print(a * 100 / len(urls_item), "%")
with open('items.json', "w") as dest_file:
json.dump(items, dest_file)
with open('mines.json', "w") as dest_file:
json.dump(get_sector_info(), dest_file)
return None
if __name__ == "__main__":
print(get_item_info('https://deeptownguide.com/Items/Details/702/stage-ii'))
update_data()
| | | |