From b3788fe34d42582e67577c198fa968c6793dae09 Mon Sep 17 00:00:00 2001
From: TheDevKiller
Date: Wed, 7 Nov 2018 23:32:46 +0100
Subject: [PATCH] Added code to replace
by \n in positions
---
fetch.py | 9 +++++++--
1 file changed, 7 insertions(+), 2 deletions(-)
diff --git a/fetch.py b/fetch.py
index 3675636..d34a979 100755
--- a/fetch.py
+++ b/fetch.py
@@ -7,12 +7,14 @@ Created on Mon Nov 5 20:55:36 2018
"""
import requests
+import re
def runfetch(url):
session=requests.Session()
- req1=session.get(url)
+ req1=session.get(url) # Get the source code
text=req1.text.split('\n')
+ # Parse the HTML source code
text=['\n'.join(row.split('\n')[:row.split('\n').index(""):]) for row in '\n'.join(text[text.index('
ANNEXES
ANNEXE I
'):text.index('Annexe
'):]).split('')[2::]]
diclist=[]
for row in text:
@@ -23,8 +25,11 @@ def runfetch(url):
if __name__ == '__main__':
- diclist=runfetch('https://www.legifrance.gouv.fr/eli/arrete/2018/10/12/PRMD1824595A/jo/texte')
+ diclist=runfetch('https://www.legifrance.gouv.fr/eli/arrete/2018/10/12/PRMD1824595A/jo/texte') # Fetch all the data from a french article
url='https://www.legifrance.gouv.fr/eli/arrete/2018/10/12/PRMD1824595A/jo/texte'
+ for index, zone in enumerate(diclist):
+ for element in zone:
+ diclist[index][element] = re.sub("", "\n", diclist[index][element]) # Replace
,
and
by \n
# Print the dict
print("===== Dict =====")
for index, zone in enumerate(diclist):