Performs a synchronization. Articles that are already synchronized aren't touched anymore.
()
| 15 | |
| 16 | |
| 17 | def sync(): |
| 18 | """ |
| 19 | Performs a synchronization. Articles that are already synchronized aren't |
| 20 | touched anymore. |
| 21 | """ |
| 22 | for blog in Blog.query.all(): |
| 23 | # parse the feed. feedparser.parse will never given an exception |
| 24 | # but the bozo bit might be defined. |
| 25 | feed = feedparser.parse(blog.feed_url) |
| 26 | |
| 27 | for entry in feed.entries: |
| 28 | # get the guid. either the id if specified, otherwise the link. |
| 29 | # if none is available we skip the entry. |
| 30 | guid = entry.get("id") or entry.get("link") |
| 31 | if not guid: |
| 32 | continue |
| 33 | |
| 34 | # get an old entry for the guid to check if we need to update |
| 35 | # or recreate the item |
| 36 | old_entry = Entry.query.filter_by(guid=guid).first() |
| 37 | |
| 38 | # get title, url and text. skip if no title or no text is |
| 39 | # given. if the link is missing we use the blog link. |
| 40 | if "title_detail" in entry: |
| 41 | title = entry.title_detail.get("value") or "" |
| 42 | if entry.title_detail.get("type") in HTML_MIMETYPES: |
| 43 | title = strip_tags(title) |
| 44 | else: |
| 45 | title = escape(title) |
| 46 | else: |
| 47 | title = entry.get("title") |
| 48 | url = entry.get("link") or blog.blog_url |
| 49 | text = ( |
| 50 | entry.content[0] if "content" in entry else entry.get("summary_detail") |
| 51 | ) |
| 52 | |
| 53 | if not title or not text: |
| 54 | continue |
| 55 | |
| 56 | # if we have an html text we use that, otherwise we HTML |
| 57 | # escape the text and use that one. We also handle XHTML |
| 58 | # with our tag soup parser for the moment. |
| 59 | if text.get("type") not in HTML_MIMETYPES: |
| 60 | text = escape(nl2p(text.get("value") or "")) |
| 61 | else: |
| 62 | text = text.get("value") or "" |
| 63 | |
| 64 | # no text? continue |
| 65 | if not text.strip(): |
| 66 | continue |
| 67 | |
| 68 | # get the pub date and updated date. This is rather complex |
| 69 | # because different feeds do different stuff |
| 70 | pub_date = ( |
| 71 | entry.get("published_parsed") |
| 72 | or entry.get("created_parsed") |
| 73 | or entry.get("date_parsed") |
| 74 | ) |