Commit f41dcda1 authored by ElaFHNW's avatar ElaFHNW
Browse files

crawler and http

parent 6759cb8c
package web;
import java.util.ArrayList;
import org.jsoup.Connection;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
public class Crawler {
public static void main(String[] args) {
String url = "https://en.wikipedia.org/";
crawl(1, url, new ArrayList<String>());
}
private static void crawl(int level, String url, ArrayList<String> visited) {
if (level <= 5) {
Document doc = request(url, visited, level);
if (doc != null) {
for (Element link : doc.select("a[href]")) {
String next_link = link.absUrl("href");
if (visited.contains(next_link) == false) {
crawl(level++, next_link, visited);
}
}
}
}
}
private static Document request(String url, ArrayList<String> v, int level) {
try {
Connection con = Jsoup.connect(url);
Document doc = con.get();
if (con.response().statusCode() == 200) {
System.out.println("Link: " + " level " + level + " " + url);
System.out.println(doc.title());
v.add(url);
return doc;
}
} catch (Exception e) {
return null;
}
return null;
}
}
package web;
// https://www.youtube.com/watch?v=wrFXBV4MwvI
import org.jsoup.*;
import org.jsoup.nodes.Document;
public class Web {
public static void main(String [] args) {
try {
Document doc = Jsoup.connect("http://example.com").get();
doc.select("a").forEach(System.out::println);
}
catch(Exception e) {
System.out.println(e.getMessage());
}
}
}
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment