diff --git a/week7/Triangles.java b/week7/Triangles.java index 0123c58..8e580be 100644 --- a/week7/Triangles.java +++ b/week7/Triangles.java @@ -11,7 +11,7 @@ public class Triangles { if (order > 1) // vertical alignment to center it properly within the root triangle - recurse(order, 0.5, (height(1)) / 2 - 0.15, 0.5); + recurse(order - 1, 0.5, (height(1)) / 2 - 0.15, 0.5); StdDraw.show(); } diff --git a/week9/HtmlItem.java b/week9/HtmlItem.java new file mode 100644 index 0000000..4ba6097 --- /dev/null +++ b/week9/HtmlItem.java @@ -0,0 +1,45 @@ +public class HtmlItem { + public enum HtmlItemType { + Text, + Tag + } + + HtmlItemType type; + String textContent; + HtmlTag tagContent; + + public static HtmlItem tag(HtmlTag tag) { + var item = new HtmlItem(); + item.type = HtmlItemType.Tag; + item.tagContent = tag; + return item; + } + + public static HtmlItem text(String content) { + var item = new HtmlItem(); + item.type = HtmlItemType.Text; + item.textContent = content; + return item; + } + + public HtmlTag getTag() { + assert this.type == HtmlItemType.Tag; + return this.tagContent; + } + + public String getText() { + assert this.type == HtmlItemType.Text; + return this.textContent; + } + + @Override + public String toString() { + switch (this.type) { + case HtmlItemType.Tag: + return this.tagContent.toString(); + case HtmlItemType.Text: + return this.textContent; + } + return null; + } +} diff --git a/week9/HtmlSelector.java b/week9/HtmlSelector.java new file mode 100644 index 0000000..22d3e81 --- /dev/null +++ b/week9/HtmlSelector.java @@ -0,0 +1,80 @@ +import java.util.ArrayList; +import java.util.Arrays; + +public class HtmlSelector { + static class HtmlSelectorItem { + final String tagName; + final String[] classNames; + + HtmlSelectorItem(String tagName, String[] classNames) { + this.tagName = tagName; + this.classNames = classNames; + } + + static HtmlSelectorItem parse(String item) { + if (item.length() == 0) + return new HtmlSelectorItem(null, null); + var sectors = item.split("."); + if(sectors.length == 0) { + if(item.charAt(0) == '.') { + return new HtmlSelectorItem(null, new String[]{ item.substring(1) }); + } else { + return new HtmlSelectorItem(item, null); + } + } + return new HtmlSelectorItem( + sectors[0].isEmpty() ? null : sectors[0], + Arrays.copyOfRange(sectors, 1, sectors.length)); + } + + boolean matches(HtmlTag tag) { + if (this.tagName != null && !this.tagName.equals(tag.name)) + return false; + if(classNames != null) { + var classes = tag.className(); + if(classes == null) return false; + for (var cn : classNames) { + var found = false; + for (var tcn : classes) + if (cn.equalsIgnoreCase(tcn)) { + found = true; + break; + } + if (!found) + return false; + } + } + return true; + } + + @Override + public String toString() { + return String.format("{name: %s, classes: %s}", tagName, Arrays.toString(classNames)); + } + } + + final HtmlSelectorItem[] items; + + HtmlSelector(String selector) { + var items = selector.split(" "); + var res = new ArrayList<>(); + for (var item : items) { + res.add(HtmlSelectorItem.parse(item)); + } + this.items = res.toArray(new HtmlSelectorItem[res.size()]); + } + + HtmlSelector(HtmlSelectorItem[] items) { + this.items = items; + } + + HtmlSelector inner() { + return new HtmlSelector(Arrays.copyOfRange(this.items, 1, this.items.length)); + } + HtmlSelectorItem current() { + return this.items[0]; + } + int length() { + return this.items.length; + } +} diff --git a/week9/HtmlTag.java b/week9/HtmlTag.java new file mode 100644 index 0000000..dd1a300 --- /dev/null +++ b/week9/HtmlTag.java @@ -0,0 +1,73 @@ +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; + +public class HtmlTag { + public final String name; + public final HashMap params; + public final HtmlItem[] kids; + + HtmlTag(String name, HashMap params, HtmlItem[] kids) { + this.name = name; + this.params = params; + this.kids = kids; + } + + public String getInnerText() { + var text = ""; + for (var item : kids) { + switch (item.type) { + case HtmlItem.HtmlItemType.Text: + text += item.getText(); + break; + case HtmlItem.HtmlItemType.Tag: + text += item.getTag().getInnerText(); + break; + } + } + text += " "; + return text.replaceAll("\\s+", " "); + } + + public String[] className() { + var classNames = this.params.get("class"); + if (classNames == null) + return null; + return classNames.split(" "); + } + + public boolean matches(HtmlSelector.HtmlSelectorItem item) { + if(item == null) return true; + return item.matches(this); + } + + public HtmlTag[] getElements(String selector) { + var results = new ArrayList(); + var htmlselector = new HtmlSelector(selector); + getElements(htmlselector, results); + return results.toArray(new HtmlTag[results.size()]); + } + void getElements(HtmlSelector selector, ArrayList results) { + var current = selector.current(); + if(matches(current)) { + // these two ifs result in the algorithm stopping at the last selector + // but if no selector is provided, all elements will be returned + if(selector.length() < 2) results.add(this); + if(selector.length() == 1) return; + selector = selector.inner(); + } + for(var kid : kids) { + if(kid.type != HtmlItem.HtmlItemType.Tag) continue; + kid.getTag().getElements(selector, results); + } + } + + public HtmlTag getElement(String selector) { + return getElements(selector)[0]; + } + + @Override + public String toString() { + return String.format("<%s params: %s> %s ", name, params, Arrays.toString(kids), name); + } +} diff --git a/week9/UglierSoup.java b/week9/UglierSoup.java new file mode 100644 index 0000000..f1dc693 --- /dev/null +++ b/week9/UglierSoup.java @@ -0,0 +1,239 @@ +import java.nio.CharBuffer; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.regex.MatchResult; +import java.util.regex.Pattern; + +import common.In; + +public class UglierSoup { + public static void main(String[] args) { + // var in = new In("./test.html"); + // var content = in.readAll(); + var content = "\n" + // + "\t\t\t\t\n" + // + "\t\t\t\t\t\n" + // + "\t\t\t\t\t\t9.4°\n" + // + "\t\t\t\t\t\n" + // + "\t\t\t\t\t\n" + // + "\t\t\t\t\t\tAskov\n" + // + "\t\t\t\t\t\t
\n" + // + "\t\t\t\t\t\t\n" + // + "\t\t\t\t\t\tFor 70 minutter siden, 103 km\n" + // + "\t\t\t\t\t\t\n" + // + "\t\t\t\t\t\t\n" + // + "\t\t\t\t\t\t\n" + // + "\t\t\t\t\t\n" + // + "\n" + // + "\t\t\t\t\t\n" + // + "\t\t\t\t\t\t\t3 m/s\n" + // + "\t\t\t\t\t\t\t\n" + // + "\t\t\t\t\t\t\t\t\t\t\t\t\n" + // + "\t\t\t\t\t\n" + // + "\t\t\t\t\t\n" + // + "\t\t\t\t\t\t\t\"wind\n" + // + "\t\t\t\t\t\n" + // + "\n" + // + "\t\t\t\t"; + var parser = new UglierSoup(content); + System.out.println(parser.data.toString()); + } + + final CharBuffer content; + final ArrayList data; + + UglierSoup(String content) { + this.content = CharBuffer.wrap(content); + this.data = loop(); + } + + UglierSoup(CharBuffer content) { + this.content = content; + this.data = loop(); + } + + public HtmlTag[] getElements(String selector) { + var results = new ArrayList(); + var htmlselector = new HtmlSelector(selector); + getElements(htmlselector, results); + return results.toArray(new HtmlTag[results.size()]); + } + + void getElements(HtmlSelector selector, ArrayList results) { + for (var kid : data) { + if (kid.type != HtmlItem.HtmlItemType.Tag) + continue; + kid.getTag().getElements(selector, results); + } + } + + // script should also have separate handling to completely skip contents + // like with comments, as HTML can be present within script tags + final static String[] selfClosingTags = { "!doctype", "link", "meta", "img", "br", "script" }; + + final static Pattern whitespace = Pattern.compile("^\\s+"); + final static Pattern comment = Pattern.compile("^"); + final static Pattern identifier = Pattern.compile("^[!a-zA-Z0-9_][a-zA-Z0-9_-]*"); + final static Pattern paramContents = Pattern.compile("^(\"[^\"]*\"|'[^']*'|[a-zA-Z0-9_][a-zA-Z0-9_-]*)"); + final static Pattern textContent = Pattern.compile("^[^<]+"); + final static Pattern scriptEnd = Pattern.compile(""); + final static Pattern htmlEntities = Pattern.compile("&([a-zA-Z0-9_-]+);"); + + boolean shouldSkipKids(String name) { + var lowercase = name.toLowerCase(); + + for (var item : selfClosingTags) { + if (item.equals(lowercase)) + return true; + } + + return false; + } + + void advance() { + advance(1); + } + + void advance(int num) { + content.position(content.position() + num); + } + + void skipPattern(Pattern pat) { + var wsmatcher = pat.matcher(content); + if (!wsmatcher.find()) + return; + advance(wsmatcher.end()); + } + + String cleanupParamValue(String value) { + if (value.charAt(0) == '"') { + value = value.substring(1, value.length() - 1); + } + + return handleHtmlEntities(value); + } + + String handleHtmlEntities(String value) { + return htmlEntities.matcher(value).replaceAll((MatchResult res) -> { + var name = res.group(1).toLowerCase(); + switch(name) { + case "deg": + return "°"; + } + return res.group(); + }); + } + + String parseText() { + var matcher = textContent.matcher(content); + if (!matcher.find()) + return null; + var text = matcher.group(); + advance(matcher.end()); + return handleHtmlEntities(text); + } + + HtmlTag parseTag() { + if (content.charAt(0) != '<') + return null; + advance(); + skipPattern(whitespace); + var nameMatcher = identifier.matcher(content); + if (!nameMatcher.find()) + return null; + var name = nameMatcher.group(); + advance(nameMatcher.end()); + var params = new HashMap(); + while (true) { + skipPattern(whitespace); + if (content.charAt(0) == '/') + advance(); + if (content.charAt(0) != '>') { + var identMatcher = identifier.matcher(content); + if (!identMatcher.find()) + break; + var paramName = identMatcher.group(); + advance(identMatcher.end()); + var paramContent = ""; + if (content.charAt(0) == '=') { + advance(); + var contentMatcher = paramContents.matcher(content); + if (contentMatcher.find()) { + paramContent = cleanupParamValue(contentMatcher.group()); + advance(contentMatcher.end()); + } + } + params.put(paramName, paramContent); + } else + break; + } + advance(); + ArrayList kids = null; + // shouldSkipKids completely omits content including ending tags + // for script we need to skip content until ending tag. + // use regex for quick lookahead. + if (name.equals("script")) { + var scriptMatcher = scriptEnd.matcher(content); + if (!scriptMatcher.find()) + throw new Error("Script doesn't have end tag?"); + advance(scriptMatcher.end()); + } else if (!shouldSkipKids(name)) { + var kidParser = new UglierSoup(content); + kids = kidParser.data; + } + + return new HtmlTag(name, params, kids == null ? new HtmlItem[0] : kids.toArray(new HtmlItem[kids.size()])); + } + + // this is wrong behaviour + // ideally this should check the closing element's name and otherwise go up the chain + // so that it 'heals' if an element is not closed before it's parent is + // this completely ignores the name of an element and just closes the current one + // and depends on valid HTML being submitted (with only the few self closing tags defined above) + boolean detectClosingTag() { + if (!(content.charAt(0) == '<' && content.charAt(1) == '/')) + return false; + advance(2); + var identMatcher = identifier.matcher(content); + if (!identMatcher.find()) + throw new Error("Ending tag without tag name?"); + advance(identMatcher.end()); + skipPattern(whitespace); + if (content.charAt(0) != '>') + throw new Error("Ending tag without > after whitespace"); + advance(); + return true; + } + + ArrayList loop() { + var data = new ArrayList(); + while (true) { + skipPattern(whitespace); + var lastPos = content.position(); + while (true) { + skipPattern(comment); + skipPattern(whitespace); + if (content.position() == lastPos) + break; + lastPos = content.position(); + } + if (content.remaining() == 0) + break; + if (detectClosingTag()) + break; + var tag = parseTag(); + if (tag != null) { + data.add(HtmlItem.tag(tag)); + } else { + var text = parseText(); + if (text != null) { + data.add(HtmlItem.text(text)); + } else + break; + } + } + return data; + } +} diff --git a/week9/Weather.java b/week9/Weather.java new file mode 100644 index 0000000..9b654ec --- /dev/null +++ b/week9/Weather.java @@ -0,0 +1,46 @@ +import java.io.FileDescriptor; +import java.io.FileOutputStream; +import java.io.PrintStream; + +import common.In; + +public class Weather { + public static void main(String[] args) { + try { + // this wasn't needed on my main NixOS system but was needed on SteamOS + // without this it just printed ? for utf-8 characters + System.setOut(new PrintStream(new FileOutputStream(FileDescriptor.out), true, "UTF-8")); + } catch (Exception e) { + } + + if (args.length != 1) { + System.out.println("Missing argument for city name"); + return; + } + String url = String.format("https://www.flotvejr.dk/%s/observations", args[0]); + // String url = "./test.html"; + String source = new In(url).readAll(); + UglierSoup soup = new UglierSoup(source); + + System.out.println("╭────────────┬─────────────────────────────┬────────────╮"); + var isFirst = true; + for (var row : soup.getElements(".nearby-observations-table tr")) { + if (isFirst) + isFirst = false; + else + System.out.println("├────────────┼─────────────────────────────┼────────────┤"); + var temperature = row.getElement(".nearby-observations-temperature").getInnerText().trim(); + var location = row.getElement(".nobr a").getInnerText(); + var observation = row.getElement(".observation_ago").getInnerText().trim().split(" "); + var time = observation[1] + " mins ago"; + var place = observation[4] + " km away"; + var nobrs = row.getElements(".nobr"); + var windspeed = nobrs[nobrs.length - 1].getInnerText().trim(); + System.out.printf( + "│ %-10s │ %-27s │ %-10s │\n│ │ %-12s - %-12s │ │\n", + temperature, location, windspeed, time, place); + } + + System.out.println("╰────────────┴─────────────────────────────┴────────────╯"); + } +} diff --git a/week9/doc.typ b/week9/doc.typ index 2c35ac1..26a0867 100644 --- a/week9/doc.typ +++ b/week9/doc.typ @@ -15,6 +15,79 @@ your ZIP code will give you a weather forecast. - You may use city names instead of zip code - Note: Do not overcommit; we are expecting something simple. +#embedClass(name: "Weather") + +Note: Other used classes omited for clarity in PDF form, see ZIP or git source. Includes a simple HTML parser and tag-name/classes selector queries. + +Example output: + +#[ +#show raw.where(block: true): set par(leading: 2pt) +``` +╭────────────┬─────────────────────────────┬────────────╮ +│ 5.8° │ Oedum │ 1 m/s │ +│ │ 91 mins ago - 16 km away │ │ +├────────────┼─────────────────────────────┼────────────┤ +│ 9.1° │ Sletterhage Fyr │ 7 m/s │ +│ │ 91 mins ago - 20 km away │ │ +├────────────┼─────────────────────────────┼────────────┤ +│ 4.7° │ Tirstrup │ 2 m/s │ +│ │ 31 mins ago - 30 km away │ │ +├────────────┼─────────────────────────────┼────────────┤ +│ 4.7° │ Horsens/Bygholm │ 3 m/s │ +│ │ 91 mins ago - 42 km away │ │ +├────────────┼─────────────────────────────┼────────────┤ +│ 5.0° │ Hald V │ 2 m/s │ +│ │ 91 mins ago - 46 km away │ │ +├────────────┼─────────────────────────────┼────────────┤ +│ 9.8° │ Roesnaes │ 9 m/s │ +│ │ 91 mins ago - 61 km away │ │ +├────────────┼─────────────────────────────┼────────────┤ +│ 4.9° │ Isenvad │ 2 m/s │ +│ │ 91 mins ago - 64 km away │ │ +├────────────┼─────────────────────────────┼────────────┤ +│ 9.2° │ Gniben │ 9 m/s │ +│ │ 91 mins ago - 69 km away │ │ +├────────────┼─────────────────────────────┼────────────┤ +│ 5.0° │ Karup │ 2 m/s │ +│ │ 31 mins ago - 69 km away │ │ +├────────────┼─────────────────────────────┼────────────┤ +│ 7.1° │ Odense / Beldringe │ 3 m/s │ +│ │ 31 mins ago - 76 km away │ │ +├────────────┼─────────────────────────────┼────────────┤ +│ 4.8° │ Aars Syd │ 0 m/s │ +│ │ 91 mins ago - 79 km away │ │ +├────────────┼─────────────────────────────┼────────────┤ +│ 5.6° │ Billund Lufthavn │ 1 m/s │ +│ │ 41 mins ago - 81 km away │ │ +├────────────┼─────────────────────────────┼────────────┤ +│ 8.6° │ Aarslev │ 3 m/s │ +│ │ 91 mins ago - 95 km away │ │ +├────────────┼─────────────────────────────┼────────────┤ +│ 6.4° │ Vamdrup │ 3 m/s │ +│ │ 31 mins ago - 98 km away │ │ +├────────────┼─────────────────────────────┼────────────┤ +│ 6.5° │ Mejrup │ 2 m/s │ +│ │ 91 mins ago - 99 km away │ │ +├────────────┼─────────────────────────────┼────────────┤ +│ 8.1° │ Borris │ 2 m/s │ +│ │ 91 mins ago - 100 km away │ │ +├────────────┼─────────────────────────────┼────────────┤ +│ 7.8° │ Askov │ 2 m/s │ +│ │ 91 mins ago - 103 km away │ │ +├────────────┼─────────────────────────────┼────────────┤ +│ 9.8° │ Assens/Toroe │ 4 m/s │ +│ │ 91 mins ago - 103 km away │ │ +├────────────┼─────────────────────────────┼────────────┤ +│ 8.7° │ Holbaek │ 5 m/s │ +│ │ 91 mins ago - 106 km away │ │ +├────────────┼─────────────────────────────┼────────────┤ +│ 5.4° │ Aalborg │ 3 m/s │ +│ │ 31 mins ago - 107 km away │ │ +╰────────────┴─────────────────────────────┴────────────╯ +``` +] + == Exercise 9.1 Write a class `Person` to represent a person. The class should have the following fields: `String firstName`, `String lastName`, `int age`, and `Person spouse`. The `spouse` field is initially `null`.