2018-11-05 22:50:53 +01:00
|
|
|
|
#!/usr/bin/env python3
|
|
|
|
|
# -*- coding: utf-8 -*-
|
|
|
|
|
"""
|
|
|
|
|
Created on Mon Nov 5 20:55:36 2018
|
|
|
|
|
|
2018-11-11 14:22:57 +01:00
|
|
|
|
@author: suwako & thedevkiller
|
2018-11-05 22:50:53 +01:00
|
|
|
|
"""
|
2018-11-06 22:13:18 +01:00
|
|
|
|
|
2018-11-05 22:50:53 +01:00
|
|
|
|
import requests
|
2018-11-07 23:32:46 +01:00
|
|
|
|
import re
|
2018-11-06 22:13:18 +01:00
|
|
|
|
|
|
|
|
|
|
2018-11-11 00:32:33 +01:00
|
|
|
|
# Variables
|
2018-11-11 09:39:34 +01:00
|
|
|
|
api = "https://master.apis.dev.openstreetmap.org/" # Testing, it should be replaced by https://api.openstreetmap.org/ when the program is finished
|
2018-11-11 00:32:33 +01:00
|
|
|
|
|
|
|
|
|
|
2018-11-11 09:39:34 +01:00
|
|
|
|
def runfetch(url='https://www.legifrance.gouv.fr/eli/arrete/2018/10/12/PRMD1824595A/jo/texte'):
|
|
|
|
|
session = requests.Session()
|
|
|
|
|
req1 = session.get(url) # Get the source code
|
|
|
|
|
text = req1.text.split('\n')
|
2018-11-07 23:32:46 +01:00
|
|
|
|
# Parse the HTML source code
|
2018-11-11 09:39:34 +01:00
|
|
|
|
text = ['\n'.join(row.split('\n')[:row.split('\n').index("</tr>"):]) for row in '\n'.join(text[text.index('<br/>ANNEXES<br/>ANNEXE I</p>'):text.index('<div style="margin-top: 30px; margin-bottom:20px;" id="JORFSCTA000037493059" class="titreSection">Annexe </div>'):]).split('<tr>')[2::]]
|
|
|
|
|
diclist = []
|
2018-11-05 22:50:53 +01:00
|
|
|
|
for row in text:
|
2018-11-11 09:39:34 +01:00
|
|
|
|
cols = [text.split('\n')[2][5::] if len(text.split('\n')) == 3 else "" for text in row.split("</td")[::]]
|
|
|
|
|
diclist.append({"id": cols[0], "commune": cols[1], "site": cols[2], "departement": cols[3], "zone": cols[4], "ministere": cols[5], "aerozone": cols[6]})
|
|
|
|
|
# dicformat:{id,commune,site,departement,zone,ministere,aerozone}
|
2018-11-06 20:55:47 +01:00
|
|
|
|
return diclist
|
2018-11-06 22:13:18 +01:00
|
|
|
|
|
2018-11-11 09:39:34 +01:00
|
|
|
|
|
2018-11-11 00:32:33 +01:00
|
|
|
|
def dms2dd(dms):
|
2018-11-11 14:58:43 +01:00
|
|
|
|
coordslst = re.search("(\d{1,3})° (\d{2})(?:'|′) (\d{2}(?:,|.)?\d{0,3})(?:\"|”|\'\')", dms).groups()
|
2018-11-11 00:32:33 +01:00
|
|
|
|
dd = float(coordslst[0].replace(",", ".")) + float(coordslst[1].replace(",", "."))/60 + float(coordslst[2].replace(",", "."))/3600
|
|
|
|
|
return dd
|
2018-11-06 22:13:18 +01:00
|
|
|
|
|
2018-11-11 00:32:33 +01:00
|
|
|
|
|
2018-11-11 09:39:34 +01:00
|
|
|
|
def fetch(url='https://www.legifrance.gouv.fr/eli/arrete/2018/10/12/PRMD1824595A/jo/texte'):
|
|
|
|
|
diclist = runfetch(url=url)
|
2018-11-07 23:32:46 +01:00
|
|
|
|
for index, zone in enumerate(diclist):
|
|
|
|
|
for element in zone:
|
2018-11-11 09:39:34 +01:00
|
|
|
|
diclist[index][element] = re.sub("<br.{0,2}>", "\n", diclist[index][element]) # Replace <br>, <br /> and <br/> by \n
|
2018-11-11 00:32:33 +01:00
|
|
|
|
|
2018-11-08 00:06:37 +01:00
|
|
|
|
# Make a the area
|
|
|
|
|
for area in diclist:
|
2018-11-09 23:04:19 +01:00
|
|
|
|
# Case insensitive
|
|
|
|
|
area["zone"] = area["zone"].lower()
|
2018-11-11 00:32:33 +01:00
|
|
|
|
# Multiple polygons
|
2018-11-09 23:04:19 +01:00
|
|
|
|
if "polygones" in area["zone"]:
|
|
|
|
|
lst = []
|
2018-11-11 09:39:34 +01:00
|
|
|
|
tmp = re.compile(".*zone.*").split(area["zone"]) # List of polygons
|
2018-11-11 00:32:33 +01:00
|
|
|
|
del tmp[0]
|
2018-11-09 23:04:19 +01:00
|
|
|
|
for polygon in tmp:
|
2018-11-11 14:40:17 +01:00
|
|
|
|
lst.append(re.findall(".{0,35} ?:? ?.? ?: ?(\d{1,3}° \d{2}(?:'|′) \d{2},?\d{0,3}(?:\"|”|\'\') .) ?\/ ?(\d{1,3}° \d{2}(?:'|′|\'\') \d{2},?\d{0,3}(?:\"|”) .)", polygon))
|
2018-11-11 00:32:33 +01:00
|
|
|
|
area["zone"] = []
|
|
|
|
|
for index, polygon in enumerate(lst):
|
|
|
|
|
area["zone"].append([])
|
|
|
|
|
for index2, coords in enumerate(polygon):
|
|
|
|
|
area["zone"][index].append([])
|
|
|
|
|
for index3, point in enumerate(coords):
|
|
|
|
|
area["zone"][index][index2].append(dms2dd(point))
|
|
|
|
|
# area["zone"][index][index2][index3] = dms2dd(point)
|
2018-11-09 23:04:19 +01:00
|
|
|
|
del lst
|
|
|
|
|
del tmp
|
2018-11-08 00:06:37 +01:00
|
|
|
|
# Polygons
|
2018-11-09 23:04:19 +01:00
|
|
|
|
elif "polygone" in area["zone"]:
|
|
|
|
|
lst = []
|
2018-11-11 14:58:43 +01:00
|
|
|
|
lst.append(re.findall(".{0,35} ?:? ?.? ?: ?(\d{1,3}° \d{2}(?:'|′) \d{2}(?:,|.)?\d{0,3}(?:\"|”|\'\') ?.) ?\/ ?(\d{1,3}° \d{2}(?:'|′|\'\') \d{2}(?:,|.)?\d{0,3}(?:\"|”|\'\') ?.)", area["zone"]))
|
2018-11-11 00:32:33 +01:00
|
|
|
|
area["zone"] = []
|
|
|
|
|
for index, polygon in enumerate(lst):
|
|
|
|
|
area["zone"].append([])
|
|
|
|
|
for index2, coords in enumerate(polygon):
|
|
|
|
|
area["zone"][index].append([])
|
|
|
|
|
for index3, point in enumerate(coords):
|
|
|
|
|
area["zone"][index][index2].append([])
|
|
|
|
|
area["zone"][index][index2][index3] = dms2dd(point)
|
2018-11-08 21:22:44 +01:00
|
|
|
|
del lst
|
2018-11-09 23:04:19 +01:00
|
|
|
|
# Circles
|
|
|
|
|
elif "cercle" in area["zone"]:
|
2018-11-11 15:18:18 +01:00
|
|
|
|
lst = [[]]
|
|
|
|
|
lst[0] = list(re.search(r"(.{6}) .{0,5}?(\d{1,3},?\d{0,2} ?.{1,2}) .{1,35} ?(\d{3}° \d{1,2}' \d{1,2}” .) ?\/ ?(\d{1,3}° \d{1,2}' \d{1,2}” .)", area["zone"], re.S).groups())
|
|
|
|
|
if "km" in lst[0][1]:
|
2018-11-11 16:11:20 +01:00
|
|
|
|
lst[0][1] = float(re.search("(\d*,?\d*).*", lst[0][1]).groups()[0].replace(",", "."))*1000
|
2018-11-11 16:07:07 +01:00
|
|
|
|
lst[0][2] = dms2dd(lst[0][2])
|
|
|
|
|
lst[0][3] = dms2dd(lst[0][3])
|
2018-11-08 00:06:37 +01:00
|
|
|
|
area["zone"] = lst
|
|
|
|
|
del lst
|
2018-11-10 15:03:45 +01:00
|
|
|
|
# Atolls
|
|
|
|
|
elif area["zone"].strip() == "atolls et eaux territoriales incluses":
|
2018-11-11 15:18:18 +01:00
|
|
|
|
area["zone"] = [("cercle", 3500.0, "138.9022", "21.82917"), ("cercle", "27KM", "138.7425", "22.23528A")]
|
2018-11-11 09:39:34 +01:00
|
|
|
|
return diclist
|
2018-11-11 00:32:33 +01:00
|
|
|
|
|
|
|
|
|
|
2018-11-11 09:39:34 +01:00
|
|
|
|
if __name__ == '__main__':
|
|
|
|
|
|
|
|
|
|
diclist = fetch() # Fetch all the data from a french article
|
2018-11-08 00:06:37 +01:00
|
|
|
|
|
|
|
|
|
# Print the dict (keep this code in the end of the file)
|
2018-11-07 23:10:52 +01:00
|
|
|
|
print("===== Dict =====")
|
2018-11-08 00:06:37 +01:00
|
|
|
|
for index, area in enumerate(diclist):
|
2018-11-11 16:07:07 +01:00
|
|
|
|
if index == 129:
|
2018-11-11 15:18:18 +01:00
|
|
|
|
print(f"\n-------------{index}----------------\n")
|
|
|
|
|
for element in area:
|
|
|
|
|
print(f" {element}: {diclist[index][element]}")
|
2018-11-11 09:39:34 +01:00
|
|
|
|
print("\n")
|