485 lines
19 KiB
Text
Executable file
485 lines
19 KiB
Text
Executable file
#! /usr/bin/env nix-shell
|
|
#! nix-shell -i python3 -p python3
|
|
|
|
# subdl - command-line tool to download subtitles from opensubtitles.org.
|
|
#
|
|
# Uses code from subdownloader (a GUI app).
|
|
|
|
__doc__ = '''\
|
|
Syntax: subdl [options] moviefile.avi ...
|
|
|
|
Subdl is a command-line tool for downloading subtitles from opensubtitles.org.
|
|
|
|
By default, it will search for English subtitles, display the results,
|
|
download the highest-rated result in the requested language and save it to the
|
|
appropriate filename.
|
|
|
|
Options:
|
|
--help This text
|
|
--version Print version and exit
|
|
--lang=LANGUAGES Comma-separated list of languages in 3-letter code, e.g.
|
|
'eng,spa,fre', or 'all' for all. Default is 'eng'.
|
|
--list-languages List available languages and exit.
|
|
--download=ID Download a particular subtitle by numeric ID.
|
|
--download=first Download the first search result [default].
|
|
--download=all Download all search results.
|
|
--download=best-rating Download the result with best rating.
|
|
--download=most-downloaded Download the most downloaded result.
|
|
--download=query Query which search result to download.
|
|
--download=none, -n Display search results and exit.
|
|
--output=OUTPUT Output to specified output filename. Can include the
|
|
following format specifiers:
|
|
%I subtitle id
|
|
%m movie file base %M movie file extension
|
|
%s subtitle file base %S subtitle file extension
|
|
%l language (English) %L language (2-letter ISO639)
|
|
Default is "%m.%S"; if multiple languages are searched,
|
|
then the default is "%m.%L.%S"; if --download=all, then
|
|
the default is "%m.%L.%I.%S".
|
|
--existing=abort Abort if output filename already exists [default].
|
|
--existing=bypass Exit gracefully if output filename already exists.
|
|
--existing=overwrite Overwrite if output filename already exists.
|
|
--existing=query Query whether to overwrite.
|
|
--imdb-id=id Query by IMDB id. Hash is tried first unless --force-imdb
|
|
is used. IMDB URLs are also accepted.
|
|
--force-imdb Force IMDB search. --imdb-id must be specified.
|
|
--force-filename Force search using filename.
|
|
--filter Filter blacklisted texts from subtitle.
|
|
--interactive, -i Equivalent to --download=query --existing=query.
|
|
'''
|
|
|
|
NAME = 'subdl'
|
|
VERSION = '1.0.3'
|
|
|
|
VERSION_INFO = '''\
|
|
|
|
This is free software; see the source for copying conditions. There is NO
|
|
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
|
|
|
http://code.google.com/p/subdl/'''
|
|
|
|
import os, sys
|
|
import struct
|
|
import xmlrpc.client
|
|
import io, gzip, base64
|
|
import getopt
|
|
import re
|
|
|
|
osdb_server = "https://api.opensubtitles.org/xml-rpc"
|
|
xmlrpc_server = xmlrpc.client.ServerProxy(osdb_server)
|
|
login = xmlrpc_server.LogIn("", "", "en", NAME+" "+VERSION)
|
|
osdb_token = login["token"]
|
|
|
|
BLACKLIST = [
|
|
'opensubtitles',
|
|
'addic7ed',
|
|
'joycasino',
|
|
'bitninja\.io',
|
|
'Please rate this subtitle at www\.osdb\.link',
|
|
'allsubs',
|
|
'firebit\.org',
|
|
'humanguardians\.com',
|
|
'subtitles by',
|
|
'recast\.ai',
|
|
'by mstoll',
|
|
'subs corrected',
|
|
'by tronar',
|
|
'titlovi',
|
|
'^_$',
|
|
'^- _$',
|
|
]
|
|
|
|
class Options: pass
|
|
options = Options()
|
|
options.lang = 'eng'
|
|
options.download = 'first'
|
|
options.output = None
|
|
options.existing = 'abort'
|
|
options.imdb_id = None
|
|
options.force_imdb = False
|
|
options.force_filename = False
|
|
options.filter = False
|
|
|
|
class SubtitleSearchResult:
|
|
def __init__(self, dict):
|
|
self.__dict__ = dict
|
|
|
|
def file_ext(filename):
|
|
return os.path.splitext(filename)[1][1:]
|
|
|
|
def file_base(filename):
|
|
return os.path.splitext(filename)[0]
|
|
|
|
def gunzipstr(zs):
|
|
with gzip.open(io.BytesIO(zs)) as gz:
|
|
return gz.read()
|
|
|
|
def writefile(filename, str):
|
|
try:
|
|
with open(filename, 'wb') as f:
|
|
f.write(str)
|
|
except Exception as e:
|
|
raise SystemExit("Error writing to %s: %s" % (filename, e))
|
|
|
|
def query_num(s, low, high):
|
|
while True:
|
|
try:
|
|
n = input("%s [%d..%d] " % (s, low, high))
|
|
except KeyboardInterrupt:
|
|
raise SystemExit("Aborted")
|
|
try:
|
|
n = int(n)
|
|
if low <= n <= high:
|
|
return n
|
|
except:
|
|
pass
|
|
|
|
def query_yn(s):
|
|
while True:
|
|
try:
|
|
r = input("%s [y/n] " % s).lower()
|
|
except KeyboardInterrupt:
|
|
raise SystemExit("Aborted")
|
|
if r.startswith('y'):
|
|
return True
|
|
elif r.startswith('n'):
|
|
return False
|
|
|
|
def filtersub(s):
|
|
s = s.strip()
|
|
line_sep = b'\r\n' if re.search(b'\r\n', s) else b'\n'
|
|
subs = re.split(b'(?:\r?\n){2,}', s)
|
|
subs = [re.split(b'\r?\n', sub, 2) for sub in subs]
|
|
filter_pattern = re.compile('|'.join(BLACKLIST).encode(), re.M | re.I)
|
|
for i in range(len(subs) - 1, -1, -1):
|
|
if len(subs[i]) < 3:
|
|
del subs[i]
|
|
continue
|
|
text = subs[i][2]
|
|
if filter_pattern.search(text):
|
|
print("Removed", i + 1, ":", text)
|
|
del subs[i]
|
|
for i in range(len(subs)):
|
|
subs[i][0] = str(i + 1).encode()
|
|
subs = map(line_sep.join, subs)
|
|
return (line_sep * 2).join(subs)
|
|
|
|
def movie_hash(name):
|
|
longlongformat = '<Q'
|
|
bytesize = struct.calcsize(longlongformat)
|
|
assert bytesize == 8
|
|
filesize = os.path.getsize(name)
|
|
hash = filesize
|
|
if filesize < 65536 * 2:
|
|
raise Exception("Error hashing %s: file too small" % (name))
|
|
with open(name, "rb") as f:
|
|
for x in range(int(65536/bytesize)):
|
|
hash += struct.unpack(longlongformat, f.read(bytesize))[0]
|
|
hash &= 0xFFFFFFFFFFFFFFFF
|
|
f.seek(filesize-65536, 0)
|
|
for x in range(int(65536/bytesize)):
|
|
hash += struct.unpack(longlongformat, f.read(bytesize))[0]
|
|
hash &= 0xFFFFFFFFFFFFFFFF
|
|
return "%016x" % hash
|
|
|
|
def SearchSubtitlesByHash(filename, langs_search):
|
|
moviehash = movie_hash(filename)
|
|
moviebytesize = os.path.getsize(filename)
|
|
searchlist = [({'sublanguageid': langs_search,
|
|
'moviehash': moviehash,
|
|
'moviebytesize': str(moviebytesize)})]
|
|
print("Searching for subtitles for moviehash=%s..." % (moviehash), file=sys.stderr)
|
|
try:
|
|
results = xmlrpc_server.SearchSubtitles(osdb_token, searchlist)
|
|
except Exception as e:
|
|
raise SystemExit("Error in XMLRPC SearchSubtitles call: %s" % e)
|
|
data = results['data']
|
|
return data and [SubtitleSearchResult(d) for d in data]
|
|
|
|
def SearchSubtitlesByIMDBId(filename, langs_search, imdb_id):
|
|
result = re.search("\d+", imdb_id)
|
|
imdb_id = result.group(0)
|
|
searchlist = [({'sublanguageid': langs_search,
|
|
'imdbid': imdb_id})]
|
|
print("Searching for subtitles for IMDB id=%s..." % (imdb_id), file=sys.stderr)
|
|
try:
|
|
results = xmlrpc_server.SearchSubtitles(osdb_token, searchlist)
|
|
except Exception as e:
|
|
raise SystemExit("Error in XMLRPC SearchSubtitles call: %s" % e)
|
|
data = results['data']
|
|
return data and [SubtitleSearchResult(d) for d in data]
|
|
|
|
def SearchSubtitlesByFileName(filename, langs_search):
|
|
file_name = file_base(os.path.basename(filename))
|
|
searchlist = [({'sublanguageid': langs_search,
|
|
'query': file_name})]
|
|
print("Searching for subtitles for query=%s..." % (file_name), file=sys.stderr)
|
|
try:
|
|
results = xmlrpc_server.SearchSubtitles(osdb_token, searchlist)
|
|
except Exception as e:
|
|
raise SystemExit("Error in XMLRPC SearchSubtitles call: %s" % e)
|
|
data = results['data']
|
|
return data and [SubtitleSearchResult(d) for d in data]
|
|
|
|
def format_movie_name(s):
|
|
if s.startswith('"') and s.endswith('"'):
|
|
s = s[1:-1]
|
|
s = s.replace('"', "'")
|
|
return '"%s"' % s
|
|
|
|
def DisplaySubtitleSearchResults(search_results):
|
|
print("Found %d results:" % (len(search_results)))
|
|
idsubtitle_maxlen = 0
|
|
moviename_maxlen = 0
|
|
downloads_maxlen = 0
|
|
for subtitle in search_results:
|
|
idsubtitle = subtitle.IDSubtitleFile
|
|
idsubtitle_maxlen = max(idsubtitle_maxlen, len(idsubtitle))
|
|
moviename = format_movie_name(subtitle.MovieName)
|
|
moviename_maxlen = max(moviename_maxlen, len(moviename))
|
|
downloads = subtitle.SubDownloadsCnt
|
|
downloads_maxlen = max(downloads_maxlen, len(downloads))
|
|
|
|
n = 0
|
|
count_maxlen = len(repr(len(search_results)))
|
|
for subtitle in search_results:
|
|
n += 1
|
|
idsubtitle = subtitle.IDSubtitleFile
|
|
lang = subtitle.ISO639
|
|
# langn = subtitle.LanguageName
|
|
# str_uploader = subtitle.UserNickName or "Anonymous"
|
|
moviename = format_movie_name(subtitle.MovieName)
|
|
filename = subtitle.SubFileName
|
|
rating = subtitle.SubRating
|
|
downloads = subtitle.SubDownloadsCnt
|
|
# idmovie = subtitle.IDMovie
|
|
# idmovieimdb = subtitle.IDMovieImdb
|
|
if options.download == 'query':
|
|
print("%s." % repr(n).rjust(count_maxlen), end=" ")
|
|
print("#%s [%s] [Rat:%s DL:%s] %s %s " % (idsubtitle.rjust(idsubtitle_maxlen),
|
|
lang,
|
|
rating.rjust(4),
|
|
downloads.rjust(downloads_maxlen),
|
|
moviename.ljust(moviename_maxlen),
|
|
filename))
|
|
|
|
def DisplaySelectedSubtitle(selected_file):
|
|
print("#{0.IDSubtitleFile} {0.SubFileName}".format(selected_file))
|
|
|
|
def DownloadSubtitle(sub_id):
|
|
'''Download subtitle #sub_id and return subtitle text as string.'''
|
|
try:
|
|
answer = xmlrpc_server.DownloadSubtitles(osdb_token, [sub_id])
|
|
subtitle_compressed = answer['data'][0]['data']
|
|
except Exception as e:
|
|
raise SystemExit("Error in XMLRPC DownloadSubtitles call: %s" % e)
|
|
return gunzipstr(base64.b64decode(subtitle_compressed))
|
|
|
|
def DownloadAndSaveSubtitle(sub_id, destfilename):
|
|
if os.path.exists(destfilename):
|
|
if options.existing == 'abort':
|
|
print("Subtitle %s already exists; aborting (try --interactive)." % destfilename)
|
|
raise SystemExit(3)
|
|
elif options.existing == 'bypass':
|
|
print("Subtitle %s already exists; bypassing." % destfilename)
|
|
return
|
|
elif options.existing == 'overwrite':
|
|
print("Subtitle %s already exists; overwriting." % destfilename)
|
|
elif options.existing == 'query':
|
|
if query_yn("Subtitle %s already exists. Overwrite?" % destfilename):
|
|
pass
|
|
else:
|
|
raise SystemExit("File not overwritten.")
|
|
else:
|
|
raise Exception("internal error: bad option.existing=%s" % options.existing)
|
|
print("Downloading #%s to %s..." % (sub_id, destfilename), file=sys.stderr, end=" ")
|
|
s = DownloadSubtitle(sub_id)
|
|
if options.filter:
|
|
s = filtersub(s)
|
|
writefile(destfilename, s)
|
|
print("done, wrote %d bytes."% (len(s)), file=sys.stderr)
|
|
|
|
def format_subtitle_output_filename(videoname, search_result):
|
|
subname = search_result.SubFileName
|
|
repl = {
|
|
'I': search_result.IDSubtitleFile,
|
|
'm': file_base(videoname), 'M': file_ext(videoname),
|
|
's': file_base(subname), 'S': file_ext(subname),
|
|
'l': search_result.LanguageName,
|
|
'L': search_result.ISO639
|
|
}
|
|
output_filename = options.output.format(**repl)
|
|
assert output_filename != videoname
|
|
return output_filename
|
|
|
|
def AutoDownloadAndSave(videoname, search_result, downloaded=None):
|
|
output_filename = format_subtitle_output_filename(videoname, search_result)
|
|
if downloaded is not None:
|
|
if output_filename in downloaded:
|
|
raise SystemExit("Already wrote to %s! Uniquify output filename format." % output_filename)
|
|
downloaded[output_filename] = 1
|
|
DownloadAndSaveSubtitle(search_result.IDSubtitleFile, output_filename)
|
|
|
|
def select_search_result_by_id(id, search_results):
|
|
for search_result in search_results:
|
|
if search_result.IDSubtitleFile == id:
|
|
return search_result
|
|
raise SystemExit("Search results did not contain subtitle with id %s" % id)
|
|
|
|
def help():
|
|
print(__doc__)
|
|
raise SystemExit
|
|
|
|
def isnumber(value):
|
|
try:
|
|
return int(value) > 0
|
|
except:
|
|
return False
|
|
|
|
def ListLanguages():
|
|
languages = xmlrpc_server.GetSubLanguages('')['data']
|
|
for language in languages:
|
|
print(language['SubLanguageID'], language['ISO639'], language['LanguageName'])
|
|
raise SystemExit
|
|
|
|
def default_output_fmt():
|
|
if options.download == 'all':
|
|
return "{m}.{L}.{I}.{S}"
|
|
elif options.lang == 'all' or ',' in options.lang:
|
|
return "{m}.{L}.{S}"
|
|
else:
|
|
return "{m}.{S}"
|
|
|
|
|
|
def parseargs(args):
|
|
try:
|
|
opts, arguments = getopt.getopt(args, 'h?in', [
|
|
'existing=', 'lang=', 'search-only=',
|
|
'download=', 'output=', 'interactive',
|
|
'list-languages', 'imdb-id=', 'force-imdb',
|
|
'force-filename', 'filter', 'help',
|
|
'version', 'versionx'])
|
|
except getopt.GetoptError as e:
|
|
raise SystemExit("%s: %s (see --help)" % (sys.argv[0], e))
|
|
for option, value in opts:
|
|
if option == '--help' or option == '-h' or option == '-?':
|
|
help()
|
|
elif option == '--versionx':
|
|
print(VERSION)
|
|
raise SystemExit
|
|
elif option == '--version':
|
|
print("%s %s" % (NAME, VERSION))
|
|
raise SystemExit
|
|
elif option == '--existing':
|
|
if value in ['abort', 'overwrite', 'bypass', 'query']:
|
|
pass
|
|
else:
|
|
raise SystemExit("Argument to --existing must be one of: abort, overwrite, bypass, query")
|
|
options.existing = value
|
|
elif option == '--lang':
|
|
options.lang = value
|
|
elif option == '--download':
|
|
if value in ['all', 'first', 'query', 'none', 'best-rating', 'most-downloaded'] or isnumber(value):
|
|
pass
|
|
else:
|
|
raise SystemExit("Argument to --download must be numeric subtitle id or one: all, first, query, none")
|
|
options.download = value
|
|
elif option == '-n':
|
|
options.download = 'none'
|
|
elif option == '--output':
|
|
options.output = value
|
|
elif option == '--imdb-id':
|
|
options.imdb_id = value
|
|
elif option == '--force-imdb':
|
|
options.force_imdb = True
|
|
elif option == '--force-filename':
|
|
options.force_filename = True
|
|
elif option == '--filter':
|
|
options.filter = True
|
|
elif option == '--interactive' or option == '-i':
|
|
options.download = 'query'
|
|
options.existing = 'query'
|
|
elif option == '--list-languages':
|
|
ListLanguages()
|
|
else:
|
|
raise SystemExit("internal error: bad option '%s'" % option)
|
|
if not options.output:
|
|
options.output = default_output_fmt()
|
|
if len(arguments) == 0:
|
|
raise SystemExit("syntax: %s [options] filename.avi (see --help)" % (sys.argv[0]))
|
|
if len(arguments) > 1 and options.force_imdb:
|
|
raise SystemExit("Can't use --force-imdb with multiple files.")
|
|
if len(arguments) > 1 and isnumber(options.download):
|
|
raise SystemExit("Can't use --download=ID with multiple files.")
|
|
|
|
return arguments
|
|
|
|
|
|
def main(args):
|
|
files = parseargs(args)
|
|
no_search_results = 0
|
|
for file in files:
|
|
selected_file = '';
|
|
if not os.path.exists(file):
|
|
raise SystemExit("can't find file '%s'" % file)
|
|
if options.force_imdb:
|
|
if options.imdb_id is None:
|
|
raise SystemExit("With --force-imdb a --imdb-id must be provided.")
|
|
search_results = SearchSubtitlesByIMDBId(file, options.lang, options.imdb_id)
|
|
elif options.force_filename:
|
|
search_results = SearchSubtitlesByFileName(file, options.lang)
|
|
else:
|
|
search_results = SearchSubtitlesByHash(file, options.lang)
|
|
if not search_results and options.imdb_id is not None:
|
|
print("No results found by hash, trying IMDB id")
|
|
search_results = SearchSubtitlesByIMDBId(file, options.lang, options.imdb_id)
|
|
elif not search_results:
|
|
print("No results found by hash, trying filename")
|
|
search_results = SearchSubtitlesByFileName(file, options.lang)
|
|
if not search_results:
|
|
print("No results found.", file=sys.stderr)
|
|
no_search_results = no_search_results + 1
|
|
continue
|
|
|
|
DisplaySubtitleSearchResults(search_results)
|
|
if options.download == 'none':
|
|
raise SystemExit
|
|
elif options.download == 'first':
|
|
selected_file = search_results[0]
|
|
print()
|
|
print("Defaulting to first result (try --interactive):")
|
|
DisplaySelectedSubtitle(selected_file)
|
|
print()
|
|
AutoDownloadAndSave(file, search_results[0])
|
|
elif options.download == 'all':
|
|
downloaded = {}
|
|
for search_result in search_results:
|
|
AutoDownloadAndSave(file, search_result, downloaded)
|
|
elif options.download == 'query':
|
|
n = query_num("Enter result to download:",
|
|
1, len(search_results))
|
|
AutoDownloadAndSave(file, search_results[n-1])
|
|
elif options.download == 'best-rating':
|
|
selected_file = max(search_results, key=lambda sub: float(sub.SubRating))
|
|
print()
|
|
print("Downloading subtitle with best rating:")
|
|
DisplaySelectedSubtitle(selected_file)
|
|
print()
|
|
AutoDownloadAndSave(file, selected_file)
|
|
elif options.download == 'most-downloaded':
|
|
selected_file = max(search_results, key=lambda sub: int(sub.SubDownloadsCnt))
|
|
print()
|
|
print("Downloading most downloaded subtitle:")
|
|
DisplaySelectedSubtitle(selected_file)
|
|
print()
|
|
AutoDownloadAndSave(file, selected_file)
|
|
elif isnumber(options.download):
|
|
search_result = select_search_result_by_id(options.download, search_results)
|
|
AutoDownloadAndSave(file, search_result)
|
|
else:
|
|
raise Exception("internal error: bad option.download=%s" % options.download)
|
|
|
|
if no_search_results > 0:
|
|
raise SystemExit("One or more subtitles were not found.")
|
|
|
|
main(sys.argv[1:])
|