214 lines
7.5 KiB
Python
214 lines
7.5 KiB
Python
from __future__ import unicode_literals
|
|
|
|
import math
|
|
|
|
# See https://infra.spec.whatwg.org/#ascii-whitespace
|
|
WHITESPACES = ("\u0009", "\u000a", "\u000c", "\u000d", "\u0020") # \t # " "
|
|
|
|
STATE_IN_DESCRIPTOR = 1
|
|
STATE_AFTER_DESCRIPTOR = 2
|
|
STATE_IN_PARENS = 3
|
|
|
|
|
|
class SRCSet(object):
|
|
raw = None
|
|
candidates = None
|
|
|
|
def __init__(self, string):
|
|
self.raw = string
|
|
|
|
def parse(self):
|
|
"""
|
|
Based on algorithm from https://html.spec.whatwg.org/multipage/images.html#parse-a-srcset-attribute
|
|
"""
|
|
# Step 1, 2, 3
|
|
pos = 0
|
|
candidates = []
|
|
state = None
|
|
|
|
# Step 4
|
|
while True:
|
|
pos, _ = collect_characters_in(self.raw, pos, WHITESPACES + (",",))
|
|
|
|
# Step 5
|
|
if pos >= len(self.raw):
|
|
# The only one place where we leave the loop
|
|
self.candidates = candidates
|
|
return candidates
|
|
|
|
# Step 6
|
|
pos, url = collect_characters_out(self.raw, pos, WHITESPACES)
|
|
|
|
# Step 7
|
|
descriptors = []
|
|
|
|
# Step 8.1
|
|
if url[-1] == ",":
|
|
while len(url) and url[-1] == ",":
|
|
url = url[:-1]
|
|
# JUMP to descriptor parser
|
|
else:
|
|
# Step 8.e.1
|
|
pos, _ = collect_characters_in(self.raw, pos, WHITESPACES)
|
|
|
|
# Step 8.e.2
|
|
current_descriptor = ""
|
|
state = STATE_IN_DESCRIPTOR
|
|
|
|
# Step 8.e.4
|
|
while True:
|
|
if pos < len(self.raw):
|
|
cc = self.raw[pos]
|
|
else:
|
|
cc = None
|
|
if state == STATE_IN_DESCRIPTOR:
|
|
if cc in WHITESPACES:
|
|
if current_descriptor:
|
|
descriptors.append(current_descriptor)
|
|
current_descriptor = ""
|
|
state = STATE_AFTER_DESCRIPTOR
|
|
elif cc == ",":
|
|
pos = pos + 1
|
|
if current_descriptor:
|
|
descriptors.append(current_descriptor)
|
|
# JUMP to descriptor parser
|
|
break
|
|
elif cc == "(":
|
|
current_descriptor = current_descriptor + cc
|
|
state = STATE_IN_PARENS
|
|
elif cc is None:
|
|
if current_descriptor:
|
|
descriptors.append(current_descriptor)
|
|
# JUMP to descriptor parser
|
|
break
|
|
else:
|
|
current_descriptor = current_descriptor + cc
|
|
elif state == STATE_IN_PARENS:
|
|
if cc == ")":
|
|
current_descriptor = current_descriptor + cc
|
|
state = STATE_IN_DESCRIPTOR
|
|
elif cc is None:
|
|
descriptors.append(current_descriptor)
|
|
# JUMP to descriptor parser
|
|
break
|
|
else:
|
|
current_descriptor = current_descriptor + cc
|
|
elif state == STATE_AFTER_DESCRIPTOR:
|
|
if cc in WHITESPACES:
|
|
pass
|
|
elif cc is None:
|
|
# JUMP to descriptor parser
|
|
break
|
|
else:
|
|
state = STATE_IN_DESCRIPTOR
|
|
pos = pos - 1
|
|
pos = pos + 1
|
|
|
|
# Step 9, 10, 11, 12 (descriptor parser)
|
|
error = False
|
|
width = None
|
|
density = None
|
|
h = None
|
|
|
|
# Step 13
|
|
# print("Descriptors", descriptors)
|
|
for descriptor in descriptors:
|
|
if len(descriptor) >= 2:
|
|
last_char = descriptor[-1]
|
|
value = descriptor[:-1]
|
|
if last_char == "w":
|
|
try:
|
|
conv_value = int(value)
|
|
except ValueError:
|
|
error = True
|
|
else:
|
|
if width or density:
|
|
error = True
|
|
elif conv_value <= 0:
|
|
error = True
|
|
elif not value.isdigit():
|
|
error = True
|
|
else:
|
|
width = value
|
|
elif last_char == "x":
|
|
try:
|
|
conv_value = float(value)
|
|
except ValueError:
|
|
error = True
|
|
else:
|
|
if width or density or h:
|
|
error = True
|
|
elif conv_value < 0:
|
|
error = True
|
|
elif value[-1] == ".":
|
|
error = True
|
|
elif value[0] == "+":
|
|
error = True
|
|
elif math.isinf(conv_value):
|
|
error = True
|
|
elif math.isnan(conv_value):
|
|
error = True
|
|
else:
|
|
density = value
|
|
elif last_char == "h":
|
|
try:
|
|
conv_value = int(value)
|
|
except ValueError:
|
|
error = True
|
|
else:
|
|
if h or density:
|
|
error = True
|
|
elif conv_value <= 0:
|
|
error = True
|
|
elif not value.isdigit():
|
|
error = True
|
|
else:
|
|
h = value
|
|
else:
|
|
error = True
|
|
else:
|
|
error = True
|
|
|
|
if h and not width:
|
|
error = True
|
|
|
|
if not error:
|
|
candidates.append({"url": url, "w": width, "x": density, "h": h})
|
|
|
|
def stringify(self):
|
|
"""
|
|
Returns string which is a valid srcset attribute
|
|
"""
|
|
result = ""
|
|
for item in self.candidates:
|
|
if result:
|
|
result = result + ", "
|
|
result = result + item["url"]
|
|
if item["w"]:
|
|
result = result + " %sw" % item["w"]
|
|
if item["x"]:
|
|
result = result + " %sx" % item["x"]
|
|
if item["h"]:
|
|
result = result + " %sh" % item["h"]
|
|
return result
|
|
|
|
|
|
def collect_characters_in(string, start, charset):
|
|
"""
|
|
Collect all characters from `start` which are part of the `charset`
|
|
"""
|
|
pos = start
|
|
while pos < len(string) and string[pos] in charset:
|
|
pos = pos + 1
|
|
return pos, string[start:pos]
|
|
|
|
|
|
def collect_characters_out(string, start, charset):
|
|
"""
|
|
Collect all characters from `start` until one of the characters from `charset`
|
|
is found
|
|
"""
|
|
pos = start
|
|
while pos < len(string) and string[pos] not in charset:
|
|
pos = pos + 1
|
|
return pos, string[start:pos]
|