Parse XML objects in AsyncTask

Aside from JSON files for storing data, there are XML files. XML files are very similar to JSON files, but require a slightly different parsing method, which is accomplished through a third “State” class.

Here is the sample XML file we will be parsing. You can see that the main objects are called “item”, which contain the individual data: <canonical>, which is the story url, <title>, which is the story title, and <pubDate>, which is the date the story was published.


<rss version="2.0">

<channel>

    <item>

    <canonical>

http://www.tallahassee.com/article/20111207/FSVIEW5/111207007/Health-concerns-close-popular-Tennessee-Strip-venues

    </canonical>

    <title>

    Health concerns close popular Tennessee Strip venues

    </title>

    <pubDate>Wed, 7 Dec 2011 00:00:00 GMT</pubDate>

    </item>

    <item>

    <canonical>

http://www.tallahassee.com/article/20111207/FSVIEW0105/111207006/Amy-Winehouse-s-Lioness-Hidden-Treasures

    </canonical>

    <title>Amy Winehouse's Lioness: Hidden Treasures</title>

    <pubDate>Wed, 7 Dec 2011 00:00:00 GMT</pubDate>

    </item>

    <item>

    <canonical>

http://www.tallahassee.com/article/20111205/FSVIEW0201/111205018/Ponderwatch-Tebow-Takeover

    </canonical>

    <title>Ponderwatch: Tebow Takeover</title>

    <pubDate>Mon, 5 Dec 2011 00:00:00 GMT</pubDate>

    </item>

</channel>

</rss>

There are several XML parsing methods, but we will be using SaxParser, arguably the most efficient method. As with parsing JSON files, we begin making a class that extends an AsyncTask. Within the class, initialize a progress dialogue to run while the parsing occurs. To begin the dialogue, override onPreExecute(), set the dialogue message, and show.


public class mLoadFeed extends AsyncTask<String, String, List<ParsedNewsItem>>{

    private final ProgressDialog mDialog = new ProgressDialog(FSUNewsReaderFeedParserActivity.this);

    private List<ParsedNewsItem> mFeedItems = new ArrayList<ParsedNewsItem>();

    @Override

    protected void onPreExecute() {

        super.onPreExecute();

        mDialog.setMessage("Fetching Stories...");

        mDialog.show();

   }

Next, we want to perform the actual retrieval and parsing. To do this, within doInBackground(), set the url to where the XML file is, initialize the SaxParser with the NewsItemHandler(), and perform the SAXParser function: parse(). This function begins the parsing process in the NewsItemHandler class.


@Override

protected List<ParsedNewsItem> doInBackground(String... feedUrl) {

    try {

        URL searchUrl = new URL(feedUrl[0]);

        SAXParserFactory spf = SAXParserFactory.newInstance();

        SAXParser saxParser = spf.newSAXParser();

        XMLReader xmlReader = saxParser.getXMLReader();

        NewsItemHandler myExampleHandler = new NewsItemHandler();

        xmlReader.setContentHandler(myExampleHandler);
        InputSource parseData = new InputSource(searchUrl.openStream());

        /* parse() function begins the parsing in the NewsItemHandler class */

        xmlReader.parse(parseData);

        mNewsItems = myExampleHandler.getParsedData();

        Log.i(TAG,"num of items: " + mFeedItems.size());

    } catch (Exception e) {

        Log.e("borked", "feed has borked "+ e.toString());

    }

    return mNewsItems;

}

The NewsItemHandler class extends the generic DefaultHandler class. Therefore, we must override the following functions in order to parse out our information, as well as have a function which returns the results.


public class NewsItemHandler extends DefaultHandler {

    final private static List<ParsedNewsItem> mListNewsItems = new ArrayList<ParsedNewsItem>();

    public List<ParsedNewsItem> getParsedData() {

        return mListNewsItems;

    }

    @Override

    public void startDocument() throws SAXException {

    // Nothing to do

    }

    @Override

    public void endDocument() throws SAXException {

    // Nothing to do

    }

    // Stores the current state in the State class based on the tag

    @Override

    public void startElement(String uri, String localName, String qName,

            Attributes attributes) throws SAXException {

        XmlState currentState = StateParser.setState(localName);

        switch(currentState) {
        // Performs cascading switch statement because nothing needs to be done
        //       for any of the States except ITEM.

        case STORYURL:

        case TITLE:

        case PUBDATE:

        case ITEM:

            mParsedNewsItem = new ParsedNewsItem();

            break;

        }

    }

    // Closes the current state when the closing tag is found

    @Override

    public void endElement(String uri, String localName, String qName)

            throws SAXException {

        XmlState currentState = StateParser.getState();

        switch(currentState) {

        case STORYURL:

        case TITLE:

        case PUBDATE:

        case ITEM:

            mListNewsItems.add(mParsedNewsItem);

            break;

        }

    }

    /* Retrieves the data between the tags and stores it in the ParsedNewsItem class */

    @Override

    public void characters(char[] ch, int start, int length)

            throws SAXException {

        String currentValue = new String(ch, start, length);

        XmlState currentState = StateParser.getState();

        switch(currentState) {

        case STORYURL:

            try {

                mParsedNewsItem.setStoryUrl(currentValue);

            } catch (MalformedURLException e) {

                e.printStackTrace();

            }

         case TITLE:

             mParsedNewsItem.setStoryTitle(currentValue);

         case PUBDATE:

             mParsedNewsItem.setPublishDate(currentValue);

         case ITEM:

             mListNewsItems.add(mParsedNewsItem);

             break;

         }

    }

}

Now as we’ve seen, there are two classes needed to perform the parsing, the StateParser class, to determine the current state, and the ParsedNewsItem class, to store the parsed data.

The StateParser class simply cross references the current tag with the items in the XmlState enum, setting the XmlState accordingly.


public class StateParser {

    public enum XmlState {

        ITEM, TITLE, STORYURL, PUBDATE

    };

    private static XmlState mState;

    public static XmlState setState(String localName) {

        XmlState state = null;

        if("item".equals(localName)) {

            state = XmlState.ITEM;

        } else if("canonical".equals(localName)) {

            state = XmlState.STORYURL;

        } else if("pubDate".equals(localName)) {

            state = XmlState.PUBDATE;

        } else if("title".equals(localName)) {

            state = XmlState.TITLE;

        }

        mState = state;

        return mState;

    }

    public static XmlState getState() {

        return mState;

    }

}

The ParsedNewsItem class simply stores all the needed data into one object, ParsedNewsItem.


public class ParsedNewsItem {

    private URL mStoryUrl = null;

    private String mStoryTitle = null;

    private String mPublishDate = null;

    public void setStoryUrl(String storyUrl) throws MalformedURLException {

        this.mStoryUrl = new URL(storyUrl);

    }

    public URL getStoryUrl() {

        return mStoryUrl;

    }

    public void setStoryTitle(String storyTitle) {

        this.mStoryTitle = storyTitle;

    }

    public String getStoryTitle() {

        return mStoryTitle;

    }

    public void setPublishDate(String publishDate) {

        this.mPublishDate = publishDate;

    }

    public String getPublishDate() {

        return mPublishDate;

    }

    public ParsedNewsItem copy() {

        ParsedNewsItem copy = new ParsedNewsItem();

        copy.mStoryUrl = mStoryUrl;

        copy.mStoryTitle = mStoryTitle;

        copy.mPublishDate = mPublishDate;

        return copy;

    }

}

Finally, after parsing the XML file and storing it in the List<ParsedNewsItem>, we retrieve the List with myExampleHandler().getParsedData() and return it. However, the dialog must first be dismissed in the overridden function onPostExecute().


protected void onPostExecute(List<String> posts) {

    dialog.dismiss();

}

In order to call the AsyncTask we use:


List<ParsedNewsItem> itemList = new mLoadFeed().execute(/* URL */).get();