|
var Nightmare = require('nightmare');
var nightmare = Nightmare({ show: true })
var options = {
marginsType: 1,
printBackground: false,
printSelectionOnly: false,
landscape: false,
pageSize: "A4"
}
var start_link = process.argv[2];
var start_page = parseInt(process.argv[3]);
var wait_time = parseInt(process.argv[4]);
if(start_link){
if(!start_page){
start_page = 1
}
if(!wait_time){
wait_time = 2000
}
nightmare
.viewport(1200, 800)
.goto('https://www.safaribooksonline.com/accounts/login/')
.type('form[action*="/accounts/login/"] [name=email]', 'james.peret@gmail.com')
.type('form[action*="/accounts/login/"] [name=password1]', 'woodstock512')
.click('form[action*="/accounts/login/"] [type=submit]')
.wait(1000)
.then(function (result) {
var getPage = function(i, link){
console.log("Capturing page " + i + ":")
nightmare.viewport(1200, 800)
.goto(link)
.wait(1000)
.evaluate(function (i) {
var link = $(".next.nav-link").attr('href');
var title = $('h1.title').text();
var $content = $('#sbo-rt-content').clone();
$('body').html($content);
$('head').append('<style type="text/css" media="print"><!-- @page { size: auto; margin: 25mm;} @media print { body {margin: 25mm;} } body { margin: 0px; padding: 0px;} #sbo-rt-content { max-width: 90%;} --></style>');
return [i, title, link]
}, i)
.then(function (result) {
var i = result[0]
var title = result[1]
var link = result[2]
var options2 = {
marginsType: 1,
printBackground: false,
printSelectionOnly: false,
landscape: false,
pageSize: "A4"
}
console.log("> title: " + title);
console.log("> next: " + link);
nightmare.wait(wait_time)
.pdf("book/" + i + "- " + title + ".pdf", options2)
if(link != null){
getPage(i + 1, "https://www.safaribooksonline.com" + link)
} else {
nightmare.end();
}
});
}
getPage(start_page, start_link);
//console.log("Found " + result.length + " links")
})
} else {
console.log("Please provide a link for the scraping starting point.")
}
|