#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Created on Mon Nov 5 20:55:36 2018 @author: suwako """ import requests import re def runfetch(url): session=requests.Session() req1=session.get(url) # Get the source code text=req1.text.split('\n') # Parse the HTML source code text=['\n'.join(row.split('\n')[:row.split('\n').index(""):]) for row in '\n'.join(text[text.index('
ANNEXES
ANNEXE I

'):text.index('
Annexe
'):]).split('')[2::]] diclist=[] for row in text: cols=[text.split('\n')[2][5::] if len(text.split('\n'))==3 else "" for text in row.split("", "\n", diclist[index][element]) # Replace
,
and
by \n # Make a the area for area in diclist: # Polygons if "polygone" in area["zone"]: lst = re.findall(".{0,35} ?:? ?.? ?: ?(\d{3}° \d{2}' \d{2},?\d{0,2}” .) ?\/ ?(\d{2}° \d{2}' \d{2},?\d{0,2}” .)", area["zone"]) area["zone"] = lst print(lst) del lst # Print the dict (keep this code in the end of the file) print("===== Dict =====") for index, area in enumerate(diclist): print(f"\n-------------{index}----------------\n") for element in area: print(f" {element}: {diclist[index][element]}")