66 * @author Sean Burke <@seantomburke>
77 */
88
9- import { parseStringPromise } from 'xml2js ' ;
9+ import { XMLParser } from 'fast-xml-parser ' ;
1010import got from 'got' ;
1111import zlib from 'zlib' ;
1212import pLimit from 'p-limit' ;
@@ -95,6 +95,7 @@ export default class Sitemapper {
9595 errors : results . errors || [ ] ,
9696 } ;
9797 }
98+
9899 /**
99100 * Get the timeout
100101 *
@@ -174,7 +175,7 @@ export default class Sitemapper {
174175 }
175176
176177 /**
177- * Requests the URL and uses parseStringPromise to parse through and find the data
178+ * Requests the URL and uses fast-xml-parser to parse through and find the data
178179 *
179180 * @private
180181 * @param {string } [url] - the Sitemaps url (e.g https://wp.seantburke.com/sitemap.xml)
@@ -218,8 +219,10 @@ export default class Sitemapper {
218219 responseBody = response . body ;
219220 }
220221
221- // otherwise parse the XML that was returned.
222- const data = await parseStringPromise ( responseBody ) ;
222+ // Parse XML using fast-xml-parser
223+ const parser = new XMLParser ( ) ;
224+
225+ const data = parser . parse ( responseBody . toString ( ) ) ;
223226
224227 // return the results
225228 return { error : null , data } ;
@@ -312,26 +315,32 @@ export default class Sitemapper {
312315 if ( this . debug ) {
313316 console . debug ( `Urlset found during "crawl('${ url } ')"` ) ;
314317 }
315- // filter out any urls that are older than the lastmod
316- const sites = data . urlset . url
318+
319+ // Convert single object to array if needed
320+ const urlArray = Array . isArray ( data . urlset . url )
321+ ? data . urlset . url
322+ : [ data . urlset . url ] ;
323+
324+ // Begin filtering the urls
325+ const sites = urlArray
317326 . filter ( ( site ) => {
318327 if ( this . lastmod === 0 ) return true ;
319328 if ( site . lastmod === undefined ) return false ;
320- const modified = new Date ( site . lastmod [ 0 ] ) . getTime ( ) ;
329+ const modified = new Date ( site . lastmod ) . getTime ( ) ;
321330
322331 return modified >= this . lastmod ;
323332 } )
324333 . filter ( ( site ) => {
325- return ! this . isExcluded ( site . loc [ 0 ] ) ;
334+ return ! this . isExcluded ( site . loc ) ;
326335 } )
327336 . map ( ( site ) => {
328337 if ( ! this . fields ) {
329- return site . loc && site . loc [ 0 ] ;
338+ return site . loc ;
330339 } else {
331340 let fields = { } ;
332341 for ( const [ field , active ] of Object . entries ( this . fields ) ) {
333342 if ( active && site [ field ] ) {
334- fields [ field ] = site [ field ] [ 0 ] ;
343+ fields [ field ] = site [ field ] ;
335344 }
336345 }
337346 return fields ;
@@ -349,7 +358,7 @@ export default class Sitemapper {
349358 }
350359 // Map each child url into a promise to create an array of promises
351360 const sitemap = data . sitemapindex . sitemap
352- . map ( ( map ) => map . loc && map . loc [ 0 ] )
361+ . map ( ( map ) => map . loc )
353362 . filter ( ( url ) => {
354363 return ! this . isExcluded ( url ) ;
355364 } ) ;
@@ -441,8 +450,8 @@ export default class Sitemapper {
441450 * @param {Buffer } body - body of the gzipped file
442451 * @returns {boolean }
443452 */
444- decompressResponseBody ( body ) {
445- return new Promise ( ( resolve , reject ) => {
453+ async decompressResponseBody ( body ) {
454+ return await new Promise ( ( resolve , reject ) => {
446455 const buffer = Buffer . from ( body ) ;
447456 zlib . gunzip ( buffer , ( err , result ) => {
448457 if ( err ) {
@@ -488,7 +497,7 @@ export default class Sitemapper {
488497 *
489498 * @typedef {Object } ParseData
490499 *
491- * @property {Error } error that either comes from `parseStringPromise` or `got` or custom error
500+ * @property {Error } error that either comes from fast-xml-parser or `got` or custom error
492501 * @property {Object } data
493502 * @property {string } data.url - URL of sitemap
494503 * @property {Array } data.urlset - Array of returned URLs
0 commit comments