You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

355 lines
15 KiB

6 months ago
import string
from collections import OrderedDict
from typing import Dict, Literal, Optional, Sequence, Union
from .. import BaseProvider, ElementsType
class Provider(BaseProvider):
"""Implement default file provider for Faker."""
application_mime_types: ElementsType[str] = (
"application/atom+xml", # Atom feeds
"application/ecmascript",
# ECMAScript/JavaScript; Defined in RFC 4329 (equivalent to
# application/javascript but with stricter processing rules)
"application/EDI-X12", # EDI X12 data; Defined in RFC 1767
"application/EDIFACT", # EDI EDIFACT data; Defined in RFC 1767
"application/json", # JavaScript Object Notation JSON; Defined in RFC 4627
# ECMAScript/JavaScript; Defined in RFC 4329 (equivalent to
# application/ecmascript
"application/javascript",
# but with looser processing rules) It is not accepted in IE 8
# or earlier - text/javascript is accepted but it is defined as obsolete in RFC 4329.
# The "type" attribute of the <script> tag in HTML5 is optional and in practice
# omitting the media type of JavaScript programs is the most interoperable
# solution since all browsers have always assumed the correct
# default even before HTML5.
"application/octet-stream",
# Arbitrary binary data.[6] Generally speaking this type identifies files that are not associated with
# a specific application. Contrary to past assumptions by software packages such as Apache this is not
# a type that should be applied to unknown files. In such a case, a server or application should not indicate
# a content type, as it may be incorrect, but rather, should omit the type in order to allow the recipient
# to guess the type.[7]
"application/ogg", # Ogg, a multimedia bitstream container format; Defined in RFC 5334
"application/pdf", # Portable Document Format, PDF has been in use for document exchange
# on the Internet since 1993; Defined in RFC 3778
"application/postscript", # PostScript; Defined in RFC 2046
"application/rdf+xml", # Resource Description Framework; Defined by RFC 3870
"application/rss+xml", # RSS feeds
"application/soap+xml", # SOAP; Defined by RFC 3902
# Web Open Font Format; (candidate recommendation; use application/x-font-woff
"application/font-woff",
# until standard is official)
"application/xhtml+xml", # XHTML; Defined by RFC 3236
"application/xml-dtd", # DTD files; Defined by RFC 3023
"application/xop+xml", # XOP
"application/zip", # ZIP archive files; Registered[8]
"application/gzip", # Gzip, Defined in RFC 6713
)
audio_mime_types: ElementsType[str] = (
"audio/basic", # mulaw audio at 8 kHz, 1 channel; Defined in RFC 2046
"audio/L24", # 24bit Linear PCM audio at 8-48 kHz, 1-N channels; Defined in RFC 3190
"audio/mp4", # MP4 audio
"audio/mpeg", # MP3 or other MPEG audio; Defined in RFC 3003
"audio/ogg", # Ogg Vorbis, Speex, Flac and other audio; Defined in RFC 5334
"audio/vorbis", # Vorbis encoded audio; Defined in RFC 5215
# RealAudio; Documented in RealPlayer Help[9]
"audio/vnd.rn-realaudio",
"audio/vnd.wave", # WAV audio; Defined in RFC 2361
"audio/webm", # WebM open media format
)
image_mime_types: ElementsType[str] = (
"image/gif", # GIF image; Defined in RFC 2045 and RFC 2046
"image/jpeg", # JPEG JFIF image; Defined in RFC 2045 and RFC 2046
"image/pjpeg",
# JPEG JFIF image; Associated with Internet Explorer; Listed in ms775147(v=vs.85) - Progressive JPEG,
# initiated before global browser support for progressive JPEGs (Microsoft and Firefox).
# Portable Network Graphics; Registered,[10] Defined in RFC 2083
"image/png",
"image/svg+xml", # SVG vector image; Defined in SVG Tiny 1.2 Specification Appendix M
# Tag Image File Format (only for Baseline TIFF); Defined in RFC 3302
"image/tiff",
"image/vnd.microsoft.icon", # ICO image; Registered[11]
)
message_mime_types: ElementsType[str] = (
"message/http", # Defined in RFC 2616
"message/imdn+xml", # IMDN Instant Message Disposition Notification; Defined in RFC 5438
"message/partial", # Email; Defined in RFC 2045 and RFC 2046
# Email; EML files, MIME files, MHT files, MHTML files; Defined in RFC
# 2045 and RFC 2046
"message/rfc822",
)
model_mime_types: ElementsType[str] = (
"model/example", # Defined in RFC 4735
"model/iges", # IGS files, IGES files; Defined in RFC 2077
"model/mesh", # MSH files, MESH files; Defined in RFC 2077, SILO files
"model/vrml", # WRL files, VRML files; Defined in RFC 2077
# X3D ISO standard for representing 3D computer graphics, X3DB binary
# files
"model/x3d+binary",
"model/x3d+vrml", # X3D ISO standard for representing 3D computer graphics, X3DV VRML files
"model/x3d+xml", # X3D ISO standard for representing 3D computer graphics, X3D XML files
)
multipart_mime_types: ElementsType[str] = (
"multipart/mixed", # MIME Email; Defined in RFC 2045 and RFC 2046
"multipart/alternative", # MIME Email; Defined in RFC 2045 and RFC 2046
# MIME Email; Defined in RFC 2387 and used by MHTML (HTML mail)
"multipart/related",
"multipart/form-data", # MIME Webform; Defined in RFC 2388
"multipart/signed", # Defined in RFC 1847
"multipart/encrypted", # Defined in RFC 1847
)
text_mime_types: ElementsType[str] = (
"text/cmd", # commands; subtype resident in Gecko browsers like Firefox 3.5
"text/css", # Cascading Style Sheets; Defined in RFC 2318
"text/csv", # Comma-separated values; Defined in RFC 4180
"text/html", # HTML; Defined in RFC 2854
"text/javascript",
# (Obsolete): JavaScript; Defined in and obsoleted by RFC 4329 in order to discourage its usage in favor of
# application/javascript. However, text/javascript is allowed in HTML 4 and 5 and, unlike
# application/javascript, has cross-browser support. The "type" attribute of the <script> tag in HTML5 is
# optional and there is no need to use it at all since all browsers have always assumed the correct default
# (even in HTML 4 where it was required by the specification).
"text/plain", # Textual data; Defined in RFC 2046 and RFC 3676
"text/vcard", # vCard (contact information); Defined in RFC 6350
"text/xml", # Extensible Markup Language; Defined in RFC 3023
)
video_mime_types: ElementsType[str] = (
"video/mpeg", # MPEG-1 video with multiplexed audio; Defined in RFC 2045 and RFC 2046
"video/mp4", # MP4 video; Defined in RFC 4337
# Ogg Theora or other video (with audio); Defined in RFC 5334
"video/ogg",
"video/quicktime", # QuickTime video; Registered[12]
"video/webm", # WebM Matroska-based open media format
"video/x-matroska", # Matroska open media format
"video/x-ms-wmv", # Windows Media Video; Documented in Microsoft KB 288102
"video/x-flv", # Flash video (FLV files)
)
mime_types: Dict[str, ElementsType[str]] = OrderedDict(
(
("application", application_mime_types),
("audio", audio_mime_types),
("image", image_mime_types),
("message", message_mime_types),
("model", model_mime_types),
("multipart", multipart_mime_types),
("text", text_mime_types),
("video", video_mime_types),
)
)
audio_file_extensions: ElementsType[str] = (
"flac",
"mp3",
"wav",
)
image_file_extensions: ElementsType[str] = (
"bmp",
"gif",
"jpeg",
"jpg",
"png",
"tiff",
)
text_file_extensions: ElementsType[str] = (
"css",
"csv",
"html",
"js",
"json",
"txt",
)
video_file_extensions: ElementsType[str] = (
"mp4",
"avi",
"mov",
"webm",
)
office_file_extensions: ElementsType[str] = (
"doc", # legacy MS Word
"docx", # MS Word
"xls", # legacy MS Excel
"xlsx", # MS Excel
"ppt", # legacy MS PowerPoint
"pptx", # MS PowerPoint
"odt", # LibreOffice document
"ods", # LibreOffice spreadsheet
"odp", # LibreOffice presentation
"pages", # Apple Pages
"numbers", # Apple Numbers
"key", # Apple Keynote
"pdf", # Portable Document Format
)
file_extensions: Dict[str, ElementsType[str]] = OrderedDict(
(
("audio", audio_file_extensions),
("image", image_file_extensions),
("office", office_file_extensions),
("text", text_file_extensions),
("video", video_file_extensions),
)
)
file_systems_path_rules: Dict[str, Dict] = {
"windows": {
"root": "C:\\",
"separator": "\\",
},
"linux": {
"root": "/",
"separator": "/",
},
}
unix_device_prefixes: ElementsType[str] = ("sd", "vd", "xvd")
def mime_type(self, category: Optional[str] = None) -> str:
"""Generate a mime type under the specified ``category``.
If ``category`` is ``None``, a random category will be used. The list of
valid categories include ``'application'``, ``'audio'``, ``'image'``,
``'message'``, ``'model'``, ``'multipart'``, ``'text'``, and
``'video'``.
:sample:
:sample: category='application'
"""
category = category if category else self.random_element(list(self.mime_types.keys()))
return self.random_element(self.mime_types[category])
def file_name(self, category: Optional[str] = None, extension: Optional[str] = None) -> str:
"""Generate a random file name with extension.
If ``extension`` is ``None``, a random extension will be created
under the hood using |file_extension| with the specified
``category``. If a value for ``extension`` is provided, the
value will be used instead, and ``category`` will be ignored.
The actual name part itself is generated using |word|. If
extension is an empty string then no extension will be added,
and file_name will be the same as |word|.
:sample: size=10
:sample: category='audio'
:sample: extension='abcdef'
:sample: category='audio', extension='abcdef'
:sample: extension=''
"""
if extension is None:
extension = self.file_extension(category)
filename: str = self.generator.word()
return f"{filename}.{extension}" if extension else filename
def file_extension(self, category: Optional[str] = None) -> str:
"""Generate a file extension under the specified ``category``.
If ``category`` is ``None``, a random category will be used. The list of
valid categories include: ``'audio'``, ``'image'``, ``'office'``,
``'text'``, and ``'video'``.
:sample:
:sample: category='image'
"""
if category is None:
category = self.random_element(list(self.file_extensions.keys()))
return self.random_element(self.file_extensions[category])
def file_path(
self,
depth: int = 1,
category: Optional[str] = None,
extension: Optional[Union[str, Sequence[str]]] = None,
absolute: Optional[bool] = True,
file_system_rule: Literal["linux", "windows"] = "linux",
) -> str:
"""Generate an pathname to a file.
This method uses |file_name| under the hood to generate the file
name itself, and ``depth`` controls the depth of the directory
path, and |word| is used under the hood to generate the
different directory names.
If ``absolute`` is ``True`` (default), the generated path starts
with ``/`` and is absolute. Otherwise, the generated path is
relative.
If used, ``extension`` can be either a string, forcing that
extension, a sequence of strings (one will be picked at random),
or an empty sequence (the path will have no extension). Default
behaviour is the same as |file_name|
if ``file_system`` is set (default="linux"), the generated path uses
specified file system path standard, the list of valid file systems include:
``'windows'``, ``'linux'``.
:sample: size=10
:sample: depth=3
:sample: depth=5, category='video'
:sample: depth=5, category='video', extension='abcdef'
:sample: extension=[]
:sample: extension=''
:sample: extension=["a", "bc", "def"]
:sample: depth=5, category='video', extension='abcdef', file_system='windows'
"""
if extension is not None and not isinstance(extension, str):
if len(extension):
extension = self.random_element(extension)
else:
extension = ""
fs_rule = self.file_systems_path_rules.get(file_system_rule, None)
if not fs_rule:
raise TypeError("Specified file system is invalid.")
root = fs_rule["root"]
seperator = fs_rule["separator"]
path: str = self.file_name(category, extension)
for _ in range(0, depth):
path = f"{self.generator.word()}{seperator}{path}"
return root + path if absolute else path
def unix_device(self, prefix: Optional[str] = None) -> str:
"""Generate a Unix device file name.
If ``prefix`` is ``None``, a random prefix will be used. The list of
valid prefixes include: ``'sd'``, ``'vd'``, and ``'xvd'``.
:sample:
:sample: prefix='mmcblk'
"""
if prefix is None:
prefix = self.random_element(self.unix_device_prefixes)
suffix: str = self.random_element(string.ascii_lowercase)
path = "/dev/%s%s" % (prefix, suffix)
return path
def unix_partition(self, prefix: Optional[str] = None) -> str:
"""Generate a Unix partition name.
This method uses |unix_device| under the hood to create a device file
name with the specified ``prefix``.
:sample:
:sample: prefix='mmcblk'
"""
path: str = self.unix_device(prefix=prefix)
path += str(self.random_digit())
return path