A node web scraper that downloads books from Safaribooks.com

test2.js 2.2KB

    var Nightmare = require('nightmare'); var nightmare = Nightmare({ show: true }) var options = { marginsType: 1, printBackground: false, printSelectionOnly: false, landscape: false, pageSize: "A4" } nightmare .viewport(1200, 800) .goto('https://www.safaribooksonline.com/accounts/login/') .type('form[action*="/accounts/login/"] [name=email]', 'james.peret@gmail.com') .type('form[action*="/accounts/login/"] [name=password1]', 'woodstock512') .click('form[action*="/accounts/login/"] [type=submit]') .wait(1000) .goto('https://www.safaribooksonline.com/library/view/learning-robotics-using/9781783287536/') .wait(100) .pdf('index.pdf', options) .evaluate(function () { var links = []; $(".detail-toc li ol").each(function(){ var a_href = $(this).find('a').attr('href'); links.push(a_href); }); toc_links = links return links; }) .then(function (result) { console.log("Found " + result.length + " links.") var getPage = function(i){ if(i < result.length){ console.log("Capturing page " + i + ":") console.log(result[i]) nightmare.viewport(1200, 800) .goto('https://www.safaribooksonline.com' + result[i]) .wait(1000) .evaluate(function (i) { console.log("> Copying content to body") var $content = $('#sbo-rt-content').clone(); $('body').html($content); console.log("> Changing print CSS") $('head').append('<style type="text/css" media="print"><!-- @page { size: auto; margin: 25mm;} @media print { body {margin: 25mm;} } body { margin: 0px; padding: 0px;} #sbo-rt-content { max-width: 90%;} --></style>'); var title = $('h1.title').text(); return [i, title] }, i) .then(function (result) { i = result[0] var options2 = { marginsType: 1, printBackground: false, printSelectionOnly: false, landscape: false, pageSize: "A4" } console.log(result[1]); nightmare.pdf(i + "- " + result[1] + ".pdf", options2) .wait(2000) getPage(i + 1) }); } } getPage(1); //console.log("Found " + result.length + " links") })