updated specific configuration for parsing

This commit is contained in:
Nicolas Lœuillet
2014-07-13 10:15:40 +02:00
parent 58dbe10388
commit 4e067ceabd
952 changed files with 7585 additions and 5682 deletions

32
inc/3rdparty/site_config/standard/csmonitor.com.txt vendored Normal file → Executable file
View File

@ -1,18 +1,18 @@
title: //h1[contains(@class, 'head')]
# standard page
body: //div[@id='mainColumn']//div[contains(@class, 'list-article-full')]
# print page
body: //div[@id='mainColumn']
author: //a[contains(@class, 'ui-author')]
single_page_link: //div[@class='storyToolbar']//a[contains(@href, '/print/')]
strip_id_or_class: storyToolbar
strip_id_or_class: promotion-tag
tidy: no
prune: no
title: //h1[contains(@class, 'head')]
# standard page
body: //div[@id='mainColumn']//div[contains(@class, 'list-article-full')]
# print page
body: //div[@id='mainColumn']
author: //a[contains(@class, 'ui-author')]
single_page_link: //div[@class='storyToolbar']//a[contains(@href, '/print/')]
strip_id_or_class: storyToolbar
strip_id_or_class: promotion-tag
tidy: no
prune: no
test_url: www.csmonitor.com/World/Middle-East/2011/1108/Imminent-Iran-nuclear-threat-A-timeline-of-warnings-since-1979/Earliest-warnings-1979-84