Source code for podgen.media
# -*- coding: utf-8 -*-
"""
podgen.media
~~~~~~~~~~~~
This file contains the Media class, which represents a pointer to a media
file.
:copyright: 2016, Thorben Dahl <thorben@sjostrom.no>
:license: FreeBSD and LGPL, see license.* for more details.
"""
# Support for Python 2.7
from __future__ import absolute_import, division, print_function, unicode_literals
from builtins import *
from future.moves.urllib.parse import urlparse
from future.utils import raise_from
import os
import tempfile
import warnings
import datetime
from tinytag import TinyTag
import requests
from podgen.not_supported_by_itunes_warning import NotSupportedByItunesWarning
from podgen import version
def _get_new_requests_session():
# TODO: Change into condition about requests' version once bug is fixed
if False:
requests_session = requests.Session()
requests_session.headers['User-Agent'] = "%s v%s" % \
(version.name, version.version_full_str)
else:
# Currently work-around for bug in requests
# See #3421 (https://github.com/kennethreitz/requests/issues/3421)
requests_session = requests
return requests_session
[docs]class Media(object):
"""
Data-oriented class representing a pointer to a media file.
A media file can be a sound file (most typical), video file or a document.
You should provide the absolute URL at which this media can be found, and
the media's file size in bytes.
Optionally, you can provide the type of media (expressed using MIME types).
When not given in the constructor, it will be found automatically by looking
at the url's file extension. If the url's file extension isn't supported by
iTunes, you will get an error if you don't supply the type.
You are also highly encouraged to provide the duration of the media.
.. note::
iTunes is lazy and will just look at the URL to figure out if
a file is of a supported file type. You must therefore ensure your URL
ends with a supported file extension.
.. note::
A warning called :class:`~podgen.NotSupportedByItunesWarning`
will be issued if your URL or type isn't compatible with iTunes. See
the Python documentation for more details on :mod:`warnings`.
Media types supported by iTunes:
* Audio
* M4A
* MP3
* Video
* MOV
* MP4
* M4V
* Document
* PDF
* EPUB
All attributes will always have a value, except size which can be 0 if the
size cannot be determined by any means (eg. if it's a stream) and duration
which is optional (but recommended).
.. seealso::
:ref:`podgen.Media-guide`
for a more gentle introduction.
"""
file_types = {
'm4a': 'audio/x-m4a',
'mp3': 'audio/mpeg',
'mov': 'video/quicktime',
'mp4': 'video/mp4',
'm4v': 'video/x-m4v',
'pdf': 'application/pdf',
'epub': 'document/x-epub',
}
def __init__(self, url, size=0, type=None, duration=None,
requests_session=None):
self._url = None
self._size = None
self._type = None
self._duration = None
self.url = url
self.size = size
self.type = type or self.get_type(url)
self.duration = duration
self.requests_session = requests_session or _get_new_requests_session()
"""The requests.Session object which shall be used. Defaults to a new
session with PodGen as User-Agent.
This is used by the instance methods :meth:`~.Media.download` and
:meth:`~.Media.fetch_duration`.
:meth:`~.Media.create_from_server_response`, however, creates its own
requests Session if not given as a parameter (since it is a static
method).
You can set this attribute manually to set your own User-Agent and
benefit from Keep-Alive across different instances of Media.
:type: :class:`requests.Session`
"""
@property
def url(self):
"""The URL at which this media is publicly accessible.
Only absolute URLs are allowed, so make sure it starts with http:// or
https://. The server should support HEAD-requests and byte-range
requests.
Ensure you quote parts of the URL that are not supposed to carry any
special meaning to the browser, typically the name of your file.
Common offenders include the slash character when not used to separate
folders, the hash mark (#) and the question mark (?). Use
:func:`urllib.parse.quote` in Python3 and :func:`urllib.quote` in
Python2.
:type: :obj:`str`
"""
return self._url
@url.setter
def url(self, url):
if not url:
raise ValueError("url cannot be empty or None")
parsed_url = urlparse(url)
file_extension = parsed_url.path.split('.')[-1].lower()
if file_extension not in self.file_types:
warnings.warn("File extension %s is not supported by iTunes."
% file_extension, NotSupportedByItunesWarning,
stacklevel=2)
if parsed_url.scheme not in ("http", "https"):
warnings.warn("URL scheme %s is not supported by iTunes. Make sure "
"you use absolute URLs and HTTP or HTTPS."
% parsed_url.scheme, NotSupportedByItunesWarning,
stacklevel=2)
self._url = url
@property
def file_extension(self):
"""The file extension of :attr:`~.Media.url`. Read-only.
:type: :obj:`str`
"""
return '.' + urlparse(self.url).path.split('.')[-1]
@property
def size(self):
"""The media's file size in bytes.
You can either provide the number of bytes as an :obj:`int`, or you can
provide a human-readable :obj:`str` with a unit, like MB or GiB.
An unknown size is represented as 0. This should ONLY be used in
exceptional cases, where it is theoretically impossible to determine
the file size (for example if it's a stream). Setting the size to 0
will issue a UserWarning.
:type: :obj:`str` (which will be converted to and stored as :obj:`int`)
or :obj:`int`
.. note::
If you provide a string, it will be translated to int when the
assignment happens. Thus, on subsequent accesses, you will get the
resulting int, not the string you put in.
.. note::
The units are case-insensitive. This means that the ``B`` is
always assumed to mean "bytes", even if it is lowercase (``b``).
Likewise, ``m`` is taken to mean mega, not milli.
"""
return self._size
@size.setter
def size(self, size):
try:
size = int(size)
if size < 0:
raise ValueError("File size must be 0 if unknown, or a positive"
" integer.")
self._size = size
if self.size == 0:
warnings.warn("Size is set to 0. This should ONLY be done when "
"there is no possible way to determine the "
"media's size, like if the media is a stream.",
stacklevel=3)
except ValueError:
self.size = self._str_to_bytes(size)
except TypeError as e:
if size is None:
self.size = 0
else:
raise e
@staticmethod
def _str_to_bytes(size):
"""Parse ``size`` and return the number of bytes it names.
See :attr:`.Media.size` for more information on this conversion."""
units = {
"b": 1,
"kb": 1000,
"kib": 1024,
"mb": 1000**2,
"mib": 1024**2,
"gb": 1000**3,
"gib": 1024**3,
"tb": 1000**4,
"tib": 1024**4
}
size = str(size).lower().strip().replace(" ", "")
number = float(size.rstrip("bkimgt"))
unit = size.lstrip("0123456789.")
try:
return round(number * units[unit])
except KeyError:
raise ValueError("The unit %s was not recognized." % unit)
@property
def type(self):
"""The MIME type of this media.
See https://en.wikipedia.org/wiki/Media_type for an introduction.
:type: :obj:`str`
.. note::
If you leave out type when creating a new Media object, the
type will be auto-detected from the :attr:`~podgen.Media.url`
attribute. However, this won't happen automatically other than
during initialization. If you want to autodetect type when
assigning a new value to url, you should use
:meth:`~podgen.Media.get_type`.
"""
return self._type
@type.setter
def type(self, type):
if not type:
raise ValueError("Type cannot be empty or None")
type = type.strip().lower()
if type not in self.file_types.values():
warnings.warn("Media type %s is not supported by iTunes." % type,
NotSupportedByItunesWarning, stacklevel=2)
self._type = type
[docs] def get_type(self, url):
"""Guess the MIME type from the URL.
This is used to fill in :attr:`~.Media.type` when it is not given (and
thus called implicitly by the constructor), but you can call it
yourself.
Example::
>>> from podgen import Media
>>> m = Media("http://example.org/1.mp3", 136532744)
>>> # The type was detected from the url:
>>> m.type
audio/mpeg
>>> # Ops, I changed my mind...
>>> m.url = "https://example.org/1.m4a"
>>> # As you can see, the type didn't change:
>>> m.type
audio/mpeg
>>> # So update type yourself
>>> m.type = m.get_type(m.url)
>>> m.type
audio/x-m4a
:param url: The URL which should be used to guess the MIME type.
:type url: str
:returns: The guessed MIME type.
:raises: ValueError if the MIME type couldn't be guessed from the URL.
"""
file_extension = urlparse(url).path.split(".")[-1].lower()
try:
return self.file_types[file_extension]
except KeyError as e:
raise_from(ValueError(
"The file extension %s was not recognized, which means it's "
"not supported by iTunes. If this is intended, please provide "
"the type yourself so clients can see what type of file it is."
% file_extension), e)
@property
def duration(self):
"""The duration of the media file.
:type: :class:`datetime.timedelta`
:raises: :obj:`TypeError` if you try to assign anything other than
:class:`datetime.timedelta` or :obj:`None` to this attribute. Raises
:obj:`ValueError` if a negative timedelta value is given.
"""
return self._duration
@duration.setter
def duration(self, duration):
if duration is None:
self._duration = None
elif not isinstance(duration, datetime.timedelta):
raise TypeError("duration must be a datetime.timedelta instance!")
elif duration.total_seconds() < 0:
raise ValueError("expected a positive timedelta, got %s" % duration)
else:
self._duration = duration
@property
def duration_str(self):
""":attr:`.duration`, formatted as a string according to iTunes' specs.
That is, HH:MM:SS if it lasts more than an hour, or MM:SS if it lasts
less than an hour.
This is just an alternate, read-only view of :attr:`.duration`.
If :attr:`.duration` is :obj:`None`, then this will be :obj:`None` as
well.
:type: :obj:`str`
"""
if self.duration is None:
return None
else:
hours = self.duration.days * 24 + \
self.duration.seconds // 3600
minutes = (self.duration.seconds // 60) % 60
seconds = self.duration.seconds % 60
if hours:
return "%02d:%02d:%02d" % (hours, minutes, seconds)
else:
return "%02d:%02d" % (minutes, seconds)
[docs] @classmethod
def create_from_server_response(cls, url, size=None, type=None,
duration=None, requests_=None):
"""Create new Media object, with size and/or type fetched from the
server when not given.
See :meth:`.Media.fetch_duration` for a (slow!) way to fill in the
duration as well.
Example (assuming the server responds with Content-Length: 252345991 and
Content-Type: audio/mpeg)::
>>> from podgen import Media
>>> # Assume an episode is hosted at example.com
>>> m = Media.create_from_server_response(
... "http://example.com/episodes/ep1.mp3")
>>> m
Media(url=http://example.com/episodes/ep1.mp3, size=252345991,
type=audio/mpeg, duration=None)
:param url: The URL at which the media can be accessed right now.
:type url: str
:param size: Size of the file. Will be fetched from server if not given.
:type size: int or None
:param type: The media type of the file. Will be fetched from server if
not given.
:type type: str or None
:param duration: The media's duration.
:type duration: :class:`datetime.timedelta` or :obj:`None`
:param requests_: Either the
`requests <http://docs.python-requests.org/en/master/>`_ module
itself, or a :class:`requests.Session` object. Defaults to a new
:class:`~requests.Session`.
:type requests_: :mod:`requests` or :class:`requests.Session`
:returns: New instance of Media with url, size and type filled in.
:raises: The appropriate requests exceptions are thrown when networking
errors occur. RuntimeError is thrown if some information isn't
given and isn't found in the server's response."""
if not (size and type):
requests_ = requests_ or _get_new_requests_session()
r = requests_.head(url, allow_redirects=True, timeout=10.0)
r.raise_for_status()
if not size:
try:
size = r.headers['Content-Length']
except KeyError:
raise RuntimeError("Content-Length not returned by server "
"when sending HEAD request to %s" % url)
if not type:
try:
type = r.headers['Content-Type']
except KeyError:
raise RuntimeError("Content-Type header not returned by "
"server when sending HEAD request to %s"
% url)
return Media(url, size, type, duration)
def __str__(self):
return "Media(url=%s, size=%s, type=%s, duration=%s)" % \
(self.url, self.size, self.type, self.duration)
def __repr__(self):
return self.__str__()
def __getstate__(self):
state = self.__dict__.copy()
del state['requests_session']
return state
def __setstate__(self, state):
self.__dict__.update(state)
self.requests_session = _get_new_requests_session()
[docs] def download(self, destination):
"""Download the media file.
This method will block until the file is downloaded in its entirety.
.. note::
The destination will not be populated atomically; if you need this,
you must give provide a temporary file as destination and rename the
file yourself.
:param destination: Where to save the media file. Either a filename,
or a file-like object. The file-like object will *not* be closed by
PodGen.
:type destination: :obj:`fd` or :obj:`str`.
"""
r = self.requests_session.get(self.url, stream=True)
r.raise_for_status()
fd = None
destination_is_fd = hasattr(destination, "write")
try:
if destination_is_fd:
fd = destination
else:
fd = open(destination, "wb")
for chunk in r.iter_content(chunk_size=None):
fd.write(chunk)
del chunk
except (Exception, KeyboardInterrupt, InterruptedError):
# Don't leave half-finished files laying around.
if fd and not destination_is_fd:
try:
fd.close()
os.remove(destination)
except FileNotFoundError:
pass
raise
finally:
if fd and not destination_is_fd:
# Close the file we've opened (doesn't hurt to close twice)
fd.close()
[docs] def populate_duration_from(self, filename):
"""Populate :attr:`.Media.duration` by analyzing the given file.
Use this method when you have the media file on the local file system.
Use :meth:`.Media.fetch_duration` if you need to download the file from
the server.
:param filename: Path to the media file which shall be used to determine
this media's duration. The file extension must match its file type,
since it is used to determine what type of media file it is. For
a list of supported formats, see
https://pypi.python.org/pypi/tinytag/
:type filename: str
"""
self.duration = self._get_duration_of(filename)
@staticmethod
def _get_duration_of(filename):
"""Return the duration of the media file located at ``filename``.
Use :meth:`.Media.populate_duration_from` if you want to populate the
duration property of a Media instance using a local file.
:param filename: Path to the media file which shall be used to determine
this media's duration. The file extension must match its file type,
since it is used to determine what type of media file it is. For
a list of supported formats, see
https://pypi.python.org/pypi/tinytag/
:type filename: str
:returns: datetime.timedelta
"""
return datetime.timedelta(seconds=TinyTag.get(filename).duration)
[docs] def fetch_duration(self):
"""Download :attr:`.Media.url` locally and use it to populate
:attr:`.Media.duration`.
Use this method when you don't have the media file on the local file
system. Use :meth:`~.Media.populate_duration_from` otherwise.
This method will take quite some time, since the media file must be
downloaded before it can be analyzed.
"""
filename = None
try:
with tempfile.NamedTemporaryFile(
delete=False, suffix=self.file_extension) as fd:
filename = fd.name
self.download(fd)
self.populate_duration_from(filename)
finally:
if filename:
os.remove(filename)