app.py
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 |
import csv import requests import xml.etree.ElementTree as ET def loadRSS(): url = 'https://www.nasa.gov/rss/dyn/breaking_news.rss' resp = requests.get(url) with open('nasa_news.xml', 'wb') as f: f.write(resp.content) def parseXML(xmlfile): tree = ET.parse(xmlfile) root = tree.getroot() newsitems = [] for item in root.findall('./channel/item'): news = {} for child in item: news[child.tag] = (child.text or '').encode('utf8') newsitems.append(news) return newsitems def savetoCSV(newsitems, filename): fieldnames = set() for item in newsitems: fieldnames.update(item.keys()) fieldnames = list(fieldnames) with open(filename, 'w', newline='', encoding='utf-8') as csvfile: writer = csv.DictWriter(csvfile, fieldnames=fieldnames) writer.writeheader() writer.writerows(newsitems) def main(): loadRSS() newsitems = parseXML('nasa_news.xml') savetoCSV(newsitems, 'nasa_news.csv') if __name__ == "__main__": main() |