fix week 7, solve weather week 9

This commit is contained in:
Daniel Bulant 2025-10-30 22:39:07 +01:00
parent d4f4778a5d
commit ba56e467f0
7 changed files with 557 additions and 1 deletions

View file

@ -11,7 +11,7 @@ public class Triangles {
if (order > 1)
// vertical alignment to center it properly within the root triangle
recurse(order, 0.5, (height(1)) / 2 - 0.15, 0.5);
recurse(order - 1, 0.5, (height(1)) / 2 - 0.15, 0.5);
StdDraw.show();
}

45
week9/HtmlItem.java Normal file
View file

@ -0,0 +1,45 @@
public class HtmlItem {
public enum HtmlItemType {
Text,
Tag
}
HtmlItemType type;
String textContent;
HtmlTag tagContent;
public static HtmlItem tag(HtmlTag tag) {
var item = new HtmlItem();
item.type = HtmlItemType.Tag;
item.tagContent = tag;
return item;
}
public static HtmlItem text(String content) {
var item = new HtmlItem();
item.type = HtmlItemType.Text;
item.textContent = content;
return item;
}
public HtmlTag getTag() {
assert this.type == HtmlItemType.Tag;
return this.tagContent;
}
public String getText() {
assert this.type == HtmlItemType.Text;
return this.textContent;
}
@Override
public String toString() {
switch (this.type) {
case HtmlItemType.Tag:
return this.tagContent.toString();
case HtmlItemType.Text:
return this.textContent;
}
return null;
}
}

80
week9/HtmlSelector.java Normal file
View file

@ -0,0 +1,80 @@
import java.util.ArrayList;
import java.util.Arrays;
public class HtmlSelector {
static class HtmlSelectorItem {
final String tagName;
final String[] classNames;
HtmlSelectorItem(String tagName, String[] classNames) {
this.tagName = tagName;
this.classNames = classNames;
}
static HtmlSelectorItem parse(String item) {
if (item.length() == 0)
return new HtmlSelectorItem(null, null);
var sectors = item.split(".");
if(sectors.length == 0) {
if(item.charAt(0) == '.') {
return new HtmlSelectorItem(null, new String[]{ item.substring(1) });
} else {
return new HtmlSelectorItem(item, null);
}
}
return new HtmlSelectorItem(
sectors[0].isEmpty() ? null : sectors[0],
Arrays.copyOfRange(sectors, 1, sectors.length));
}
boolean matches(HtmlTag tag) {
if (this.tagName != null && !this.tagName.equals(tag.name))
return false;
if(classNames != null) {
var classes = tag.className();
if(classes == null) return false;
for (var cn : classNames) {
var found = false;
for (var tcn : classes)
if (cn.equalsIgnoreCase(tcn)) {
found = true;
break;
}
if (!found)
return false;
}
}
return true;
}
@Override
public String toString() {
return String.format("{name: %s, classes: %s}", tagName, Arrays.toString(classNames));
}
}
final HtmlSelectorItem[] items;
HtmlSelector(String selector) {
var items = selector.split(" ");
var res = new ArrayList<>();
for (var item : items) {
res.add(HtmlSelectorItem.parse(item));
}
this.items = res.toArray(new HtmlSelectorItem[res.size()]);
}
HtmlSelector(HtmlSelectorItem[] items) {
this.items = items;
}
HtmlSelector inner() {
return new HtmlSelector(Arrays.copyOfRange(this.items, 1, this.items.length));
}
HtmlSelectorItem current() {
return this.items[0];
}
int length() {
return this.items.length;
}
}

73
week9/HtmlTag.java Normal file
View file

@ -0,0 +1,73 @@
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
public class HtmlTag {
public final String name;
public final HashMap<String, String> params;
public final HtmlItem[] kids;
HtmlTag(String name, HashMap<String, String> params, HtmlItem[] kids) {
this.name = name;
this.params = params;
this.kids = kids;
}
public String getInnerText() {
var text = "";
for (var item : kids) {
switch (item.type) {
case HtmlItem.HtmlItemType.Text:
text += item.getText();
break;
case HtmlItem.HtmlItemType.Tag:
text += item.getTag().getInnerText();
break;
}
}
text += " ";
return text.replaceAll("\\s+", " ");
}
public String[] className() {
var classNames = this.params.get("class");
if (classNames == null)
return null;
return classNames.split(" ");
}
public boolean matches(HtmlSelector.HtmlSelectorItem item) {
if(item == null) return true;
return item.matches(this);
}
public HtmlTag[] getElements(String selector) {
var results = new ArrayList<HtmlTag>();
var htmlselector = new HtmlSelector(selector);
getElements(htmlselector, results);
return results.toArray(new HtmlTag[results.size()]);
}
void getElements(HtmlSelector selector, ArrayList<HtmlTag> results) {
var current = selector.current();
if(matches(current)) {
// these two ifs result in the algorithm stopping at the last selector
// but if no selector is provided, all elements will be returned
if(selector.length() < 2) results.add(this);
if(selector.length() == 1) return;
selector = selector.inner();
}
for(var kid : kids) {
if(kid.type != HtmlItem.HtmlItemType.Tag) continue;
kid.getTag().getElements(selector, results);
}
}
public HtmlTag getElement(String selector) {
return getElements(selector)[0];
}
@Override
public String toString() {
return String.format("<%s params: %s> %s </%s>", name, params, Arrays.toString(kids), name);
}
}

239
week9/UglierSoup.java Normal file
View file

@ -0,0 +1,239 @@
import java.nio.CharBuffer;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.regex.MatchResult;
import java.util.regex.Pattern;
import common.In;
public class UglierSoup {
public static void main(String[] args) {
// var in = new In("./test.html");
// var content = in.readAll();
var content = "\n" + //
"\t\t\t\t<tr>\n" + //
"\t\t\t\t\t<td class=\"nearby-observations-temperature red\">\n" + //
"\t\t\t\t\t\t9.4&deg;\n" + //
"\t\t\t\t\t</td>\n" + //
"\t\t\t\t\t<td class=\"nobr\">\n" + //
"\t\t\t\t\t\t<a href=\"/weather-station/askov\">Askov</a>\n" + //
"\t\t\t\t\t\t<br>\n" + //
"\t\t\t\t\t\t<span class=\"observation_ago\">\n" + //
"\t\t\t\t\t\tFor 70 minutter siden, 103 km\n" + //
"\t\t\t\t\t\t<!--\n" + //
"\t\t\t\t\t\t åt \n" + //
"\t\t\t\t\t\t-->\n" + //
"\t\t\t\t\t\t</span>\n" + //
"\t\t\t\t\t\t\n" + //
"\t\t\t\t\t</td>\n" + //
"\n" + //
"\t\t\t\t\t<td class=\"nobr\">\n" + //
"\t\t\t\t\t\t\t3 m/s\n" + //
"\t\t\t\t\t\t\t\n" + //
"\t\t\t\t\t</td>\t\t\t\t\t\t\t\n" + //
"\t\t\t\t\t\n" + //
"\t\t\t\t\t<td>\n" + //
"\t\t\t\t\t\t\t<img alt=\"wind arrow\" class=\"\" src=\"//static.flotvejr.dk/images/arrows/blue/15/270.png\" />\n" + //
"\t\t\t\t\t</td>\n" + //
"\n" + //
"\t\t\t\t</tr>";
var parser = new UglierSoup(content);
System.out.println(parser.data.toString());
}
final CharBuffer content;
final ArrayList<HtmlItem> data;
UglierSoup(String content) {
this.content = CharBuffer.wrap(content);
this.data = loop();
}
UglierSoup(CharBuffer content) {
this.content = content;
this.data = loop();
}
public HtmlTag[] getElements(String selector) {
var results = new ArrayList<HtmlTag>();
var htmlselector = new HtmlSelector(selector);
getElements(htmlselector, results);
return results.toArray(new HtmlTag[results.size()]);
}
void getElements(HtmlSelector selector, ArrayList<HtmlTag> results) {
for (var kid : data) {
if (kid.type != HtmlItem.HtmlItemType.Tag)
continue;
kid.getTag().getElements(selector, results);
}
}
// script should also have separate handling to completely skip contents
// like with comments, as HTML can be present within script tags
final static String[] selfClosingTags = { "!doctype", "link", "meta", "img", "br", "script" };
final static Pattern whitespace = Pattern.compile("^\\s+");
final static Pattern comment = Pattern.compile("^<!--[\\s\\S]*?-->");
final static Pattern identifier = Pattern.compile("^[!a-zA-Z0-9_][a-zA-Z0-9_-]*");
final static Pattern paramContents = Pattern.compile("^(\"[^\"]*\"|'[^']*'|[a-zA-Z0-9_][a-zA-Z0-9_-]*)");
final static Pattern textContent = Pattern.compile("^[^<]+");
final static Pattern scriptEnd = Pattern.compile("</script\\s*>");
final static Pattern htmlEntities = Pattern.compile("&([a-zA-Z0-9_-]+);");
boolean shouldSkipKids(String name) {
var lowercase = name.toLowerCase();
for (var item : selfClosingTags) {
if (item.equals(lowercase))
return true;
}
return false;
}
void advance() {
advance(1);
}
void advance(int num) {
content.position(content.position() + num);
}
void skipPattern(Pattern pat) {
var wsmatcher = pat.matcher(content);
if (!wsmatcher.find())
return;
advance(wsmatcher.end());
}
String cleanupParamValue(String value) {
if (value.charAt(0) == '"') {
value = value.substring(1, value.length() - 1);
}
return handleHtmlEntities(value);
}
String handleHtmlEntities(String value) {
return htmlEntities.matcher(value).replaceAll((MatchResult res) -> {
var name = res.group(1).toLowerCase();
switch(name) {
case "deg":
return "°";
}
return res.group();
});
}
String parseText() {
var matcher = textContent.matcher(content);
if (!matcher.find())
return null;
var text = matcher.group();
advance(matcher.end());
return handleHtmlEntities(text);
}
HtmlTag parseTag() {
if (content.charAt(0) != '<')
return null;
advance();
skipPattern(whitespace);
var nameMatcher = identifier.matcher(content);
if (!nameMatcher.find())
return null;
var name = nameMatcher.group();
advance(nameMatcher.end());
var params = new HashMap<String, String>();
while (true) {
skipPattern(whitespace);
if (content.charAt(0) == '/')
advance();
if (content.charAt(0) != '>') {
var identMatcher = identifier.matcher(content);
if (!identMatcher.find())
break;
var paramName = identMatcher.group();
advance(identMatcher.end());
var paramContent = "";
if (content.charAt(0) == '=') {
advance();
var contentMatcher = paramContents.matcher(content);
if (contentMatcher.find()) {
paramContent = cleanupParamValue(contentMatcher.group());
advance(contentMatcher.end());
}
}
params.put(paramName, paramContent);
} else
break;
}
advance();
ArrayList<HtmlItem> kids = null;
// shouldSkipKids completely omits content including ending tags
// for script we need to skip content until ending tag.
// use regex for quick lookahead.
if (name.equals("script")) {
var scriptMatcher = scriptEnd.matcher(content);
if (!scriptMatcher.find())
throw new Error("Script doesn't have end tag?");
advance(scriptMatcher.end());
} else if (!shouldSkipKids(name)) {
var kidParser = new UglierSoup(content);
kids = kidParser.data;
}
return new HtmlTag(name, params, kids == null ? new HtmlItem[0] : kids.toArray(new HtmlItem[kids.size()]));
}
// this is wrong behaviour
// ideally this should check the closing element's name and otherwise go up the chain
// so that it 'heals' if an element is not closed before it's parent is
// this completely ignores the name of an element and just closes the current one
// and depends on valid HTML being submitted (with only the few self closing tags defined above)
boolean detectClosingTag() {
if (!(content.charAt(0) == '<' && content.charAt(1) == '/'))
return false;
advance(2);
var identMatcher = identifier.matcher(content);
if (!identMatcher.find())
throw new Error("Ending tag without tag name?");
advance(identMatcher.end());
skipPattern(whitespace);
if (content.charAt(0) != '>')
throw new Error("Ending tag without > after whitespace");
advance();
return true;
}
ArrayList<HtmlItem> loop() {
var data = new ArrayList<HtmlItem>();
while (true) {
skipPattern(whitespace);
var lastPos = content.position();
while (true) {
skipPattern(comment);
skipPattern(whitespace);
if (content.position() == lastPos)
break;
lastPos = content.position();
}
if (content.remaining() == 0)
break;
if (detectClosingTag())
break;
var tag = parseTag();
if (tag != null) {
data.add(HtmlItem.tag(tag));
} else {
var text = parseText();
if (text != null) {
data.add(HtmlItem.text(text));
} else
break;
}
}
return data;
}
}

46
week9/Weather.java Normal file
View file

@ -0,0 +1,46 @@
import java.io.FileDescriptor;
import java.io.FileOutputStream;
import java.io.PrintStream;
import common.In;
public class Weather {
public static void main(String[] args) {
try {
// this wasn't needed on my main NixOS system but was needed on SteamOS
// without this it just printed ? for utf-8 characters
System.setOut(new PrintStream(new FileOutputStream(FileDescriptor.out), true, "UTF-8"));
} catch (Exception e) {
}
if (args.length != 1) {
System.out.println("Missing argument for city name");
return;
}
String url = String.format("https://www.flotvejr.dk/%s/observations", args[0]);
// String url = "./test.html";
String source = new In(url).readAll();
UglierSoup soup = new UglierSoup(source);
System.out.println("╭────────────┬─────────────────────────────┬────────────╮");
var isFirst = true;
for (var row : soup.getElements(".nearby-observations-table tr")) {
if (isFirst)
isFirst = false;
else
System.out.println("├────────────┼─────────────────────────────┼────────────┤");
var temperature = row.getElement(".nearby-observations-temperature").getInnerText().trim();
var location = row.getElement(".nobr a").getInnerText();
var observation = row.getElement(".observation_ago").getInnerText().trim().split(" ");
var time = observation[1] + " mins ago";
var place = observation[4] + " km away";
var nobrs = row.getElements(".nobr");
var windspeed = nobrs[nobrs.length - 1].getInnerText().trim();
System.out.printf(
"│ %-10s │ %-27s │ %-10s │\n│ │ %-12s - %-12s │ │\n",
temperature, location, windspeed, time, place);
}
System.out.println("╰────────────┴─────────────────────────────┴────────────╯");
}
}

View file

@ -15,6 +15,79 @@ your ZIP code will give you a weather forecast.
- You may use city names instead of zip code
- Note: Do not overcommit; we are expecting something simple.
#embedClass(name: "Weather")
Note: Other used classes omited for clarity in PDF form, see ZIP or git source. Includes a simple HTML parser and tag-name/classes selector queries.
Example output:
#[
#show raw.where(block: true): set par(leading: 2pt)
```
╭────────────┬─────────────────────────────┬────────────╮
│ 5.8° │ Oedum │ 1 m/s │
│ │ 91 mins ago - 16 km away │ │
├────────────┼─────────────────────────────┼────────────┤
│ 9.1° │ Sletterhage Fyr │ 7 m/s │
│ │ 91 mins ago - 20 km away │ │
├────────────┼─────────────────────────────┼────────────┤
│ 4.7° │ Tirstrup │ 2 m/s │
│ │ 31 mins ago - 30 km away │ │
├────────────┼─────────────────────────────┼────────────┤
│ 4.7° │ Horsens/Bygholm │ 3 m/s │
│ │ 91 mins ago - 42 km away │ │
├────────────┼─────────────────────────────┼────────────┤
│ 5.0° │ Hald V │ 2 m/s │
│ │ 91 mins ago - 46 km away │ │
├────────────┼─────────────────────────────┼────────────┤
│ 9.8° │ Roesnaes │ 9 m/s │
│ │ 91 mins ago - 61 km away │ │
├────────────┼─────────────────────────────┼────────────┤
│ 4.9° │ Isenvad │ 2 m/s │
│ │ 91 mins ago - 64 km away │ │
├────────────┼─────────────────────────────┼────────────┤
│ 9.2° │ Gniben │ 9 m/s │
│ │ 91 mins ago - 69 km away │ │
├────────────┼─────────────────────────────┼────────────┤
│ 5.0° │ Karup │ 2 m/s │
│ │ 31 mins ago - 69 km away │ │
├────────────┼─────────────────────────────┼────────────┤
│ 7.1° │ Odense / Beldringe │ 3 m/s │
│ │ 31 mins ago - 76 km away │ │
├────────────┼─────────────────────────────┼────────────┤
│ 4.8° │ Aars Syd │ 0 m/s │
│ │ 91 mins ago - 79 km away │ │
├────────────┼─────────────────────────────┼────────────┤
│ 5.6° │ Billund Lufthavn │ 1 m/s │
│ │ 41 mins ago - 81 km away │ │
├────────────┼─────────────────────────────┼────────────┤
│ 8.6° │ Aarslev │ 3 m/s │
│ │ 91 mins ago - 95 km away │ │
├────────────┼─────────────────────────────┼────────────┤
│ 6.4° │ Vamdrup │ 3 m/s │
│ │ 31 mins ago - 98 km away │ │
├────────────┼─────────────────────────────┼────────────┤
│ 6.5° │ Mejrup │ 2 m/s │
│ │ 91 mins ago - 99 km away │ │
├────────────┼─────────────────────────────┼────────────┤
│ 8.1° │ Borris │ 2 m/s │
│ │ 91 mins ago - 100 km away │ │
├────────────┼─────────────────────────────┼────────────┤
│ 7.8° │ Askov │ 2 m/s │
│ │ 91 mins ago - 103 km away │ │
├────────────┼─────────────────────────────┼────────────┤
│ 9.8° │ Assens/Toroe │ 4 m/s │
│ │ 91 mins ago - 103 km away │ │
├────────────┼─────────────────────────────┼────────────┤
│ 8.7° │ Holbaek │ 5 m/s │
│ │ 91 mins ago - 106 km away │ │
├────────────┼─────────────────────────────┼────────────┤
│ 5.4° │ Aalborg │ 3 m/s │
│ │ 31 mins ago - 107 km away │ │
╰────────────┴─────────────────────────────┴────────────╯
```
]
== Exercise 9.1
Write a class `Person` to represent a person. The class should have the following fields: `String firstName`, `String lastName`, `int age`, and `Person spouse`. The `spouse` field is initially `null`.