$OpenBSD: patch-beetsplug_lyrics_py,v 1.1 2021/05/16 17:10:44 sthen Exp $

update lyrics.py wholesale to a newer version from git; fixes an
"AttributeError: 'NoneType' object has no attribute 'get_text'" which
occurs with the old version

https://github.com/beetbox/beets/commit/b239a0b3d2dcbde0495750903dc2e69067d48ebf.patch
https://marc.info/?l=openbsd-ports&m=162110190318247&w=2

Index: beetsplug/lyrics.py
--- beetsplug/lyrics.py.orig
+++ beetsplug/lyrics.py
@@ -55,6 +55,7 @@ except ImportError:
 
 from beets import plugins
 from beets import ui
+from beets import util
 import beets
 
 DIV_RE = re.compile(r'<(/?)div>?', re.I)
@@ -186,6 +187,9 @@ def search_pairs(item):
     In addition to the artist and title obtained from the `item` the
     method tries to strip extra information like paranthesized suffixes
     and featured artists from the strings and add them as candidates.
+    The artist sort name is added as a fallback candidate to help in
+    cases where artist name includes special characters or is in a
+    non-latin script.
     The method also tries to split multiple titles separated with `/`.
     """
     def generate_alternatives(string, patterns):
@@ -199,12 +203,16 @@ def search_pairs(item):
                 alternatives.append(match.group(1))
         return alternatives
 
-    title, artist = item.title, item.artist
+    title, artist, artist_sort = item.title, item.artist, item.artist_sort
 
     patterns = [
         # Remove any featuring artists from the artists name
         r"(.*?) {0}".format(plugins.feat_tokens())]
     artists = generate_alternatives(artist, patterns)
+    # Use the artist_sort as fallback only if it differs from artist to avoid
+    # repeated remote requests with the same search terms
+    if artist != artist_sort:
+        artists.append(artist_sort)
 
     patterns = [
         # Remove a parenthesized suffix from a title string. Common
@@ -351,14 +359,9 @@ class Genius(Backend):
             'User-Agent': USER_AGENT,
         }
 
-    def lyrics_from_song_api_path(self, song_api_path):
-        song_url = self.base_url + song_api_path
-        response = requests.get(song_url, headers=self.headers)
-        json = response.json()
-        path = json["response"]["song"]["path"]
-
+    def lyrics_from_song_page(self, page_url):
         # Gotta go regular html scraping... come on Genius.
-        page_url = "https://genius.com" + path
+        self._log.debug(u'fetching lyrics from: {0}', page_url)
         try:
             page = requests.get(page_url)
         except requests.RequestException as exc:
@@ -370,15 +373,38 @@ class Genius(Backend):
         # Remove script tags that they put in the middle of the lyrics.
         [h.extract() for h in html('script')]
 
-        # At least Genius is nice and has a tag called 'lyrics'!
-        # Updated css where the lyrics are based in HTML.
-        lyrics = html.find("div", class_="lyrics").get_text()
+        # Most of the time, the page contains a div with class="lyrics" where
+        # all of the lyrics can be found already correctly formatted
+        # Sometimes, though, it packages the lyrics into separate divs, most
+        # likely for easier ad placement
+        lyrics_div = html.find("div", class_="lyrics")
+        if not lyrics_div:
+            self._log.debug(u'Received unusual song page html')
+            verse_div = html.find("div",
+                                  class_=re.compile("Lyrics__Container"))
+            if not verse_div:
+                if html.find("div",
+                             class_=re.compile("LyricsPlaceholder__Message"),
+                             string="This song is an instrumental"):
+                    self._log.debug('Detected instrumental')
+                    return "[Instrumental]"
+                else:
+                    self._log.debug("Couldn't scrape page using known layouts")
+                    return None
 
-        return lyrics
+            lyrics_div = verse_div.parent
+            for br in lyrics_div.find_all("br"):
+                br.replace_with("\n")
+            ads = lyrics_div.find_all("div",
+                                      class_=re.compile("InreadAd__Container"))
+            for ad in ads:
+                ad.replace_with("\n")
 
+        return lyrics_div.get_text()
+
     def fetch(self, artist, title):
         search_url = self.base_url + "/search"
-        data = {'q': title}
+        data = {'q': title + " " + artist.lower()}
         try:
             response = requests.get(search_url, data=data,
                                     headers=self.headers)
@@ -392,21 +418,25 @@ class Genius(Backend):
             self._log.debug(u'Genius API request returned invalid JSON')
             return None
 
-        song_info = None
         for hit in json["response"]["hits"]:
-            if hit["result"]["primary_artist"]["name"] == artist:
-                song_info = hit
-                break
+            # Genius uses zero-width characters to denote lowercase
+            # artist names.
+            hit_artist = hit["result"]["primary_artist"]["name"]. \
+                strip(u'\u200b').lower()
 
-        if song_info:
-            song_api_path = song_info["result"]["api_path"]
-            return self.lyrics_from_song_api_path(song_api_path)
+            if hit_artist == artist.lower():
+                return self.lyrics_from_song_page(hit["result"]["url"])
 
+        self._log.debug(u'genius: no matching artist')
 
+
 class LyricsWiki(SymbolsReplaced):
     """Fetch lyrics from LyricsWiki."""
 
-    URL_PATTERN = 'http://lyrics.wikia.com/%s:%s'
+    if util.SNI_SUPPORTED:
+        URL_PATTERN = 'https://lyrics.wikia.com/%s:%s'
+    else:
+        URL_PATTERN = 'http://lyrics.wikia.com/%s:%s'
 
     def fetch(self, artist, title):
         url = self.build_url(artist, title)
@@ -740,7 +770,8 @@ class LyricsPlugin(plugins.BeetsPlugin):
             write = ui.should_write()
             if opts.writerest:
                 self.writerest_indexes(opts.writerest)
-            for item in lib.items(ui.decargs(args)):
+            items = lib.items(ui.decargs(args))
+            for item in items:
                 if not opts.local_only and not self.config['local']:
                     self.fetch_item_lyrics(
                         lib, item, write,
@@ -750,10 +781,10 @@ class LyricsPlugin(plugins.BeetsPlugin):
                     if opts.printlyr:
                         ui.print_(item.lyrics)
                     if opts.writerest:
-                        self.writerest(opts.writerest, item)
-            if opts.writerest:
-                # flush last artist
-                self.writerest(opts.writerest, None)
+                        self.appendrest(opts.writerest, item)
+            if opts.writerest and len(items) > 0:
+                # flush last artist & write to ReST
+                self.writerest(opts.writerest)
                 ui.print_(u'ReST files generated. to build, use one of:')
                 ui.print_(u'  sphinx-build -b html %s _build/html'
                           % opts.writerest)
@@ -765,26 +796,21 @@ class LyricsPlugin(plugins.BeetsPlugin):
         cmd.func = func
         return [cmd]
 
-    def writerest(self, directory, item):
-        """Write the item to an ReST file
+    def appendrest(self, directory, item):
+        """Append the item to an ReST file
 
         This will keep state (in the `rest` variable) in order to avoid
         writing continuously to the same files.
         """
 
-        if item is None or slug(self.artist) != slug(item.albumartist):
-            if self.rest is not None:
-                path = os.path.join(directory, 'artists',
-                                    slug(self.artist) + u'.rst')
-                with open(path, 'wb') as output:
-                    output.write(self.rest.encode('utf-8'))
-                self.rest = None
-                if item is None:
-                    return
+        if slug(self.artist) != slug(item.albumartist):
+            # Write current file and start a new one ~ item.albumartist
+            self.writerest(directory)
             self.artist = item.albumartist.strip()
             self.rest = u"%s\n%s\n\n.. contents::\n   :local:\n\n" \
                         % (self.artist,
                            u'=' * len(self.artist))
+
         if self.album != item.album:
             tmpalbum = self.album = item.album.strip()
             if self.album == '':
@@ -795,6 +821,16 @@ class LyricsPlugin(plugins.BeetsPlugin):
         self.rest += u"%s\n%s\n\n%s\n\n" % (title_str,
                                             u'~' * len(title_str),
                                             block)
+
+    def writerest(self, directory):
+        """Write self.rest to a ReST file
+        """
+        if self.rest is not None and self.artist is not None:
+            path = os.path.join(directory, 'artists',
+                                slug(self.artist) + u'.rst')
+            with open(path, 'wb') as output:
+                output.write(self.rest.encode('utf-8'))
+
 
     def writerest_indexes(self, directory):
         """Write conf.py and index.rst files necessary for Sphinx
