updated specific configuration for parsing

This commit is contained in:
Nicolas Lœuillet
2014-07-13 10:15:40 +02:00
parent 58dbe10388
commit 4e067ceabd
952 changed files with 7585 additions and 5682 deletions

12
inc/3rdparty/site_config/standard/informit.com.txt vendored Normal file → Executable file
View File

@ -1,7 +1,7 @@
title: //div[@id='content']/h1
body: //div[@id="content"]
strip: //img[contains(@src, 'informit_printer.png')]
single_page_link: //div[contains(@class, 'articleTools')]//a[contains(@href, '/printerfriendly.')]
prune: no
title: //div[@id='content']/h1
body: //div[@id="content"]
strip: //img[contains(@src, 'informit_printer.png')]
single_page_link: //div[contains(@class, 'articleTools')]//a[contains(@href, '/printerfriendly.')]
prune: no
test_url: http://www.informit.com/articles/article.aspx?p=1729268