0

I'm using Node, Mongoose and Bluebird and am working on a site that serves up several RSS feeds.

When a feed is retrieved I want to pull any articles that I've already saved to my table from the table and combine them with any new articles in the feed into an array.I cannot seem to find a way to wait for the promises to all resolve before returning my array. The basic problem is that when loadRSSFeed is called it essentially immediately calls the return articleList line.How do I line all of this up so nothing is returned until my promises are all resolved?

This first function just sort of wraps everything:

function loadRSSFeed(rss, newsSource) {
var articleList = [];

// Promise.each(rss.channel[0].item, function (article) {
rss.channel[0].item.forEach(function (article) {
    var item = {};
    item.link = tryEval(article, "article.link[0]");

    Promise(function() {
        return getArticle(newsSource, article, item, articleList)
        .then(function() {
             return articleList; 
        })
    })
})                 
};          

The getArticle function looks like this:

function getArticle(newsSource, article, item, articleList) {
return Articles.findOne({ link: article.link }, function (err, doc) {
    if (doc) {
        articleList.push(doc._doc);
    }
    else {
        item.title = tryEval(article, "article.title[0]");
        item.pubDate = tryEval(article, "article.pubDate[0]");
        item.sourceId = newsSource.id;
        item.sourceName = newsSource.name;

        if (item.pubDate) {
            try {
                item.pubDate = new Date(item.pubDate);
            }
            catch (err) {
                item.pubDate = "";
            }
        };

        item.contentSnippet = tryEval(article, "article.description[0]");
        if (item.contentSnippet.indexOf("<") > 0) {
            item.contentSnippet = item.contentSnippet.substring(0, item.contentSnippet.indexOf("<") - 1);
        };

        item.image = tryEval(article, "article['media:content'][0].$.url|article.thumbnail[0]");
        if (!item.image) {
            item.image = photoHunt(item);
            if (item.image) {
                item.contentSnippet = "";
            }
        };
        if (item.title && item.link && (item.image || item.contentSnippet)) {
            articleList.push(saveArticle(item));
        }
    }
})
} 

and the saveArticle function looks like this:

function saveArticle(article) {
var curArticle = {};

if (article._id) {
    curArticle = article;
    curArticle._id = article._id;
    curArticle.isNew = false;
}
else {
    curArticle = new Articles();
    curArticle.title = article.title;
    curArticle.link = article.link;
    curArticle.pubDate = article.pubDate;
    curArticle.image = article.image;
    curArticle.contentSnippet = article.contentSnippet;
    curArticle.sourceName = article.name;
    curArticle.sourceId = article.sourceId;

    if (article.haters) {
        curArticle.haters = article.haters;
    };

    if (article.lovers) {
        curArticle.lovers = article.lovers;
    };

    if (article.readers) {
        curArticle.readers = article.readers;
    };
}
curArticle.save(function (err) {
    if (err)
        console.log(err);
});
return curArticle;
};

This is what the Articles model looks like, just in case there's an issue here:

var mongoose     = require('mongoose'), 
Schema       = mongoose.Schema;
// NewsSchema = new Schema({ name: String });   

var ArticlesSchema   = new Schema({
title: String,
link: String,
pubDate: Date,
image: String,
contentSnippet: String,
sourceName: String,
lovers: [],
haters: [],
readers: [],
forumLinks: []
});

module.exports = mongoose.model('Articles', ArticlesSchema);

1 Answer 1

2

It looks to me like you can just use Bluebird's Promise.map() to both iterate through all the RSS feeds and to wait for all of them to finish:

function loadRSSFeed(rss, newsSource) {
    // Promise.each(rss.channel[0].item, function (article) {
    return Promise.map(rss.channel[0].item, function(article) {
        var item = {};
        item.link = tryEval(article, "article.link[0]");
        return getArticle(newsSource, article, item, articleList);
    }).then(function(articleList) {
        // filter out any empty items
        return articleList.filter(function(article) {
            return !!article;
        });
    });
}


function getArticle(newsSource, article, item) {
    var find = Promise.promisify(Articles.findOne, {context: Articles});
    return find({link: article.link}).then(doc) {
        if (doc) {
            return doc._doc;
        } else {
            item.title = tryEval(article, "article.title[0]");
            item.pubDate = tryEval(article, "article.pubDate[0]");
            item.sourceId = newsSource.id;
            item.sourceName = newsSource.name;
            if (item.pubDate) {
                try {
                    item.pubDate = new Date(item.pubDate);
                } catch (err) {
                    item.pubDate = "";
                }
            };
            item.contentSnippet = tryEval(article, "article.description[0]");
            if (item.contentSnippet.indexOf("<") > 0) {
                item.contentSnippet = item.contentSnippet.substring(0, item.contentSnippet.indexOf("<") - 1);
            };
            item.image = tryEval(article, "article['media:content'][0].$.url|article.thumbnail[0]");
            if (!item.image) {
                item.image = photoHunt(item);
                if (item.image) {
                    item.contentSnippet = "";
                }
            };
            if (item.title && item.link && (item.image || item.contentSnippet)) {
                return saveArticle(item);
            }
        }
        // unsure what your code intends if there was no article here
        // this will return undefined which will get filtered out later
    })
}

function saveArticle(article) {
    return new Promise(function(resolve, reject) {
        var curArticle;
        if (article._id) {
            curArticle = article;
            curArticle._id = article._id;           // don't know why this is needed since curArticle === article already
            curArticle.isNew = false;
        } else {
            curArticle = new Articles();
            curArticle.title = article.title;
            curArticle.link = article.link;
            curArticle.pubDate = article.pubDate;
            curArticle.image = article.image;
            curArticle.contentSnippet = article.contentSnippet;
            curArticle.sourceName = article.name;
            curArticle.sourceId = article.sourceId;
            if (article.haters) {
                curArticle.haters = article.haters;
            };
            if (article.lovers) {
                curArticle.lovers = article.lovers;
            };
            if (article.readers) {
                curArticle.readers = article.readers;
            };
        }
        curArticle.save(function (err) {
            if (err) reject(err) else resolve(curArticle);
        });
    });
};
Sign up to request clarification or add additional context in comments.

17 Comments

Thank you! Super close, if I am seeing this correctly, articleList is coming back in the calling function as what appears to be the unresolved promise and new articles are no longer be saved - I think because the promise is left in limbo.
@MikeFeltman - I missed the part about saveArticle() being async (I didn't see that). Do you need me to help with that too or can you apply these same principles to that function?
Let me give it a shot and if not, I'll be back. :) Thanks again! I'm not sure that's the only issue though because some of the articles are already in the table and they aren't in the array either. Maybe not, I guess there can't really be an array until all the promises are resolved.
@MikeFeltman - I redid my answer to incorporate saveArticle().
Thanks pretty close to what I had. I'm getting an error when the promisified find is called. It's an unhandled rejection TypeError: Invalid select() argument. Must be string or object, at Query.select. I traced it and it's in the Mongoose internals. It appears to be an issue with not specifying a projection, which is supposed to be optional.
|

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.