defi-rendu-legifrance/fetch.py
2018-11-12 00:29:17 +01:00

109 lines
4.9 KiB
Python
Executable File
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Mon Nov 5 20:55:36 2018
@author: suwako & thedevkiller
"""
import requests
import re
# Variables
api = "https://master.apis.dev.openstreetmap.org/" # Testing, it should be replaced by https://api.openstreetmap.org/ when the program is finished
def runfetch(url='https://www.legifrance.gouv.fr/eli/arrete/2018/10/12/PRMD1824595A/jo/texte'):
session = requests.Session()
req1 = session.get(url) # Get the source code
text = req1.text.split('\n')
# Parse the HTML source code
text = ['\n'.join(row.split('\n')[:row.split('\n').index("</tr>"):]) for row in '\n'.join(text[text.index('<br/>ANNEXES<br/>ANNEXE I</p>'):text.index('<div style="margin-top: 30px; margin-bottom:20px;" id="JORFSCTA000037493059" class="titreSection">Annexe </div>'):]).split('<tr>')[2::]]
diclist = []
for row in text:
cols = [text.split('\n')[2][5::] if len(text.split('\n')) == 3 else "" for text in row.split("</td")[::]]
diclist.append({"id": cols[0], "commune": cols[1], "site": cols[2], "departement": cols[3], "zone": cols[4], "ministere": cols[5], "aerozone": cols[6]})
# dicformat:{id,commune,site,departement,zone,ministere,aerozone}
return diclist
def dms2dd(dms):
coordslst = re.search("(\d{1,3})° (\d{2})(?:'|) (\d{2}(?:,|.)?\d{0,3})(?:\"|”|\'\') ?(.)", dms).groups()
dd = float(coordslst[0].replace(",", ".")) + float(coordslst[1].replace(",", "."))/60 + float(coordslst[2].replace(",", "."))/3600
if coordslst[3] in ["n", "e"]:
return dd
if coordslst[3] in ["s", "o"]:
return -dd
def fetch(url='https://www.legifrance.gouv.fr/eli/arrete/2018/10/12/PRMD1824595A/jo/texte'):
diclist = runfetch(url=url)
for index, zone in enumerate(diclist):
for element in zone:
diclist[index][element] = re.sub("<br.{0,2}>", "\n", diclist[index][element]) # Replace <br>, <br /> and <br/> by \n
# Make a the area
for area in diclist:
# Case insensitive
area["zone"] = area["zone"].lower()
# Multiple polygons
if "polygones" in area["zone"]:
lst = []
tmp = re.compile(".*(?:zone|csg).*").split(area["zone"]) # List of polygons
del tmp[0]
for polygon in tmp:
lst.append(re.findall(".{0,35} ?:? ?.? ?: ?(\d{1,3}° \d{2}(?:'|) \d{2},?\d{0,3}(?:\"|”|\'\') .) ?\/ ?(\d{1,3}° \d{2}(?:'||\'\') \d{2},?\d{0,3}(?:\"|”) .)", polygon))
area["zone"] = []
for index, polygon in enumerate(lst):
area["zone"].append([])
for index2, coords in enumerate(polygon):
area["zone"][index].append([])
for index3, point in enumerate(coords):
area["zone"][index][index2].append(dms2dd(point))
# area["zone"][index][index2][index3] = dms2dd(point)
del lst
del tmp
# Polygons
elif "polygone" in area["zone"]:
lst = []
lst.append(re.findall(".{0,35} ?:? ?.? ?: ?(\d{1,3}° \d{2}(?:'|) \d{2}(?:,|.)?\d{0,3}(?:\"|”|\'\') ?.) ?\/ ?(\d{1,3}° \d{2}(?:'||\'\') \d{2}(?:,|.)?\d{0,3}(?:\"|”|\'\') ?.)", area["zone"]))
area["zone"] = []
for index, polygon in enumerate(lst):
area["zone"].append([])
for index2, coords in enumerate(polygon):
area["zone"][index].append([])
for index3, point in enumerate(coords):
area["zone"][index][index2].append([])
area["zone"][index][index2][index3] = dms2dd(point)
del lst
# Circles
elif "cercle" in area["zone"]:
lst = [[]]
lst[0] = list(re.search(r"(.{6}) .{0,5}?(\d{1,3},?\d{0,2} ?.{1,2}) .{1,35} ?(\d{3}° \d{1,2}' \d{1,2}” .) ?\/ ?(\d{1,3}° \d{1,2}' \d{1,2}” .)", area["zone"], re.S).groups())
if "km" in lst[0][1]:
lst[0][1] = float(re.search("(\d*,?\d*).*", lst[0][1]).groups()[0].replace(",", "."))*1000
else:
lst[0][1] = float(re.search("(\d*,?\d*).*", lst[0][1]).groups()[0].replace(",", "."))
lst[0][2] = dms2dd(lst[0][2])
lst[0][3] = dms2dd(lst[0][3])
area["zone"] = lst
del lst
# Atolls
elif area["zone"].strip() == "atolls et eaux territoriales incluses":
area["zone"] = [("cercle", 35000.0, -138.9022, -21.82917), ("cercle", 27000, -138.7425, -22.23528)]
return diclist
if __name__ == '__main__':
diclist = fetch() # Fetch all the data from a french article
# Print the dict (keep this code in the end of the file)
print("===== Dict =====")
for index, area in enumerate(diclist):
print(f"\n-------------{index}----------------\n")
for element in area:
print(f" {element}: {diclist[index][element]}")
print("\n")