Welcome to ShenZhenJia Knowledge Sharing Community for programmer and developer-Open, Learning and Share
menu search
person
Welcome To Ask or Share your Answers For Others

Categories

I have a CasperJS script that scrapes ratings and dates from one webpage. Now I want to scrape the same data from multiple pages under the same website. How can I loop through the different subpages given this code:

var ratings = [];
var dates = [];
var casper = require('casper').create({

    pageSettings: {
        loadImages:  false,         
        loadPlugins: false          
    },
    logLevel: "debug",             
    verbose: true                   
});

var fs = require('fs');

function getRatings() {
    var ratings = document.querySelectorAll('#BVRRRatingOverall_Review_Display > div.BVRRRatingNormalImage > img');
    return Array.prototype.map.call(ratings, function(e) {
        return e.getAttribute('title');
    });
}

function getDate() {
    var dates = document.querySelectorAll('#BVSubmissionPopupContainer > div.BVRRReviewDisplayStyle5Header > div.BVRRReviewDateContainer > span.BVRRValue.BVRRReviewDate');

    return Array.prototype.map.call(dates, function(e) {

        return e.innerHTML;

    });
}

casper.start('http://www.t-mobile.com/cell-phones/samsung-galaxy-s-5.html?bvrrp=9060/reviews/product/1/598aea53-16d0-4c12-b53a-105157092c52.htm', function(){

    this.echo('hi');
});

casper.then(function() {

    ratings = this.evaluate(getRatings);
    dates = this.evaluate(getDate);

    this.echo(ratings);
});


casper.run(function() {

    this.echo(ratings.length + ' ratings found:');

     for(var i=0; i<ratings.length; i++){
        ratings[i] = ratings[i]+': '+dates[i];
        dates[i] = '';
     }
    this.echo(ratings);
    var content = ratings;

    content = content.join("
");

    fs.write("C:/Users/Karan/Copy/tweesis/implementation/scraping/samsungratings.txt", content, 'w'); 

    this.echo(dates.length + ' dates found:').exit();



});

Any help is appreciated :)

See Question&Answers more detail:os

与恶龙缠斗过久,自身亦成为恶龙;凝视深渊过久,深渊将回以凝视…
thumb_up_alt 0 like thumb_down_alt 0 dislike
211 views
Welcome To Ask or Share your Answers For Others

1 Answer

Since there exists a next page button, you can use it to traverse all pages recursively:

function getRatingsAndWrite(){
    ratings = casper.evaluate(getRatings);
    dates = casper.evaluate(getDate);

    casper.echo(ratings);
    casper.echo(ratings.length + ' ratings found:');

    for(var i=0; i<ratings.length; i++){
        ratings[i] = ratings[i]+': '+dates[i];
        dates[i] = '';
    }
    casper.echo(ratings);
    var content = ratings;

    content = content.join("
");

    fs.write("C:/Users/Karan/Copy/tweesis/implementation/scraping/samsungratings.txt", content, 'a'); 

    casper.echo(dates.length + ' dates found:');

    var nextLink = ".BVRRPageLink.BVRRNextPage > a";
    if (casper.visible(nextLink)) {
        casper.thenClick(nextLink);
        casper.then(getRatingsAndWrite);
    } else {
        casper.echo("END")
    }
}

casper.start('http://www.t-mobile.com/cell-phones/samsung-galaxy-s-5.html?bvrrp=9060/reviews/product/1/598aea53-16d0-4c12-b53a-105157092c52.htm');

casper.then(getRatingsAndWrite);

casper.run();

A related answer is A: CasperJS parse next page after button click.


与恶龙缠斗过久,自身亦成为恶龙;凝视深渊过久,深渊将回以凝视…
thumb_up_alt 0 like thumb_down_alt 0 dislike
Welcome to ShenZhenJia Knowledge Sharing Community for programmer and developer-Open, Learning and Share
...