Browse code

use okhttp

devnewton authored on 23/06/2017 13:16:06
Showing 3 changed files
1 1
new file mode 100644
... ...
@@ -0,0 +1 @@
1
+/output/
0 2
\ No newline at end of file
... ...
@@ -12,9 +12,9 @@
12 12
     </properties>
13 13
     <dependencies>
14 14
         <dependency>
15
-            <groupId>io.parallec</groupId>
16
-            <artifactId>parallec-core</artifactId>
17
-            <version>0.10.5</version>
15
+            <groupId>com.squareup.okhttp3</groupId>
16
+            <artifactId>okhttp</artifactId>
17
+            <version>3.8.1</version>
18 18
         </dependency>
19 19
         <dependency>
20 20
             <groupId>org.apache.commons</groupId>
... ...
@@ -26,5 +26,10 @@
26 26
             <artifactId>commons-lang3</artifactId>
27 27
             <version>3.5</version>
28 28
         </dependency>
29
+        <dependency>
30
+            <groupId>org.apache.commons</groupId>
31
+            <artifactId>commons-io</artifactId>
32
+            <version>1.3.2</version>
33
+        </dependency>
29 34
     </dependencies>
30 35
 </project>
31 36
\ No newline at end of file
... ...
@@ -1,20 +1,14 @@
1 1
 package im.bci.fta2tsv;
2 2
 
3
-import io.parallec.core.ParallecHeader;
4
-import io.parallec.core.ParallecResponseHandler;
5
-import io.parallec.core.ParallelClient;
6
-import io.parallec.core.ResponseOnSingleTask;
7 3
 import java.io.File;
8 4
 import java.io.IOException;
9
-import java.nio.charset.Charset;
10 5
 import java.time.LocalDate;
11
-import java.time.format.DateTimeFormatter;
12
-import java.util.ArrayList;
13
-import java.util.Map;
14
-import java.util.logging.Level;
15
-import java.util.logging.Logger;
6
+import okhttp3.Call;
7
+import okhttp3.Callback;
8
+import okhttp3.OkHttpClient;
9
+import okhttp3.Request;
10
+import okhttp3.Response;
16 11
 import org.apache.commons.io.FileUtils;
17
-import org.apache.commons.io.IOUtils;
18 12
 import org.apache.commons.lang3.StringUtils;
19 13
 
20 14
 /**
... ...
@@ -24,86 +18,49 @@ import org.apache.commons.lang3.StringUtils;
24 18
 public class Fta2Tsv {
25 19
 
26 20
     private static final String[] TRIBUNES = {"batavie", "dlfp", "euromussels", "finss", "eurofaab", "old-dlfp"};
27
-    private static final LocalDate START_DATE = LocalDate.of(2017, 6, 20);
28
-    private static final LocalDate END_DATE = LocalDate.now().plusDays(1);
21
+    private static final LocalDate START_DATE = LocalDate.of(2006, 12, 01);
22
+    private static final LocalDate END_DATE = LocalDate.now();
23
+    private static final File OUTPUT_DIR = new File("output");
29 24
 
30
-    public static void main(String[] args) {
31
-        ArrayList<String> requests = new ArrayList<>();
32
-        for (String tribune : TRIBUNES) {
33
-            for (LocalDate date = START_DATE; date.isBefore(END_DATE); date = date.plusDays(1)) {
34
-                requests.add(tribune + "/" + date.toString());
35
-            }
36
-        }
37
-        ParallelClient pc = new ParallelClient();
38
-        pc.prepareHttpGet("/t/$REQ")
39
-                .setHttpHeaders(new ParallecHeader().addPair("Accept", "text/tab-separated-values"))
40
-                .setReplaceVarMapToSingleTargetSingleVar("REQ", requests, "bombefourchette.com")
41
-                .execute(new ParallecResponseHandler() {
42
-                    @Override
43
-                    public void onCompleted(ResponseOnSingleTask res, Map<String, Object> responseContext) {
44
-                        String body = res.getResponseContent();
45
-                        if(StringUtils.isNotBlank(body)) {
46
-                            try {
47
-                                String filename = StringUtils.removeStart(res.getRequest().getResourcePath(), "/t/");
48
-                                filename = StringUtils.replace(filename, "/", "_") + ".tsv";
49
-                                FileUtils.write(new File(filename), body, "UTF-8");
50
-                            } catch (IOException ex) {
51
-                                Logger.getLogger(Fta2Tsv.class.getName()).log(Level.SEVERE, null, ex);
52
-                            }
25
+    public static void main(String[] args) throws InterruptedException {
26
+        OkHttpClient client = new OkHttpClient();
27
+        for (final String tribune : TRIBUNES) {
28
+            File outputDir = new File(OUTPUT_DIR, tribune);
29
+            outputDir.mkdirs();
30
+            for (LocalDate dateIterator = END_DATE; dateIterator.isAfter(START_DATE); dateIterator = dateIterator.minusDays(1)) {
31
+                final LocalDate date = dateIterator;
32
+                final File outputFile = new File(outputDir, date + ".tsv");
33
+                if (outputFile.length() <= 0) {
34
+                    Request request = new Request.Builder()
35
+                            .url("http://bombefourchette.com/t/" + tribune + "/" + date)
36
+                            .header("Accept", "text/tab-separated-values")
37
+                            .build();
38
+                    client.newCall(request).enqueue(new Callback() {
39
+                        @Override
40
+                        public void onFailure(Call call, IOException ioe) {
41
+                            System.err.println("Cannot retrieve " + tribune + " at " + date + " :");
42
+                            System.err.println(ioe);
53 43
                         }
54
-                        System.out.println(body);
55
-                    }
56
-                });
57
-
58
-    }
59
-
60
-    private static final DateTimeFormatter POST_DATE_TIME_FORMATTER = DateTimeFormatter.ofPattern("yyyyMMddhhmmss");
61
-    private static final DateTimeFormatter POST_TIME_FORMATTER = DateTimeFormatter.ofPattern("HH:mm:ss");/*
62
-	private static final Whitelist MESSAGE_WHITELIST = Whitelist.none().addTags("b", "i", "s", "u", "tt", "code", "spoiler");
63 44
 
64
-    
65
-    private static void retrieve(String tribune, LocalDate date) {
66
-        String url = "http://bombefourchette.com/t/" + tribune + "/" + date.toString();
67
-        Element debugPost = null;
68
-        try (FileWriter fw = new FileWriter(tribune + "-" + date.toString() + ".tsv")) {
69
-            Document doc = Jsoup.connect(url).get();
70
-            final CSVPrinter printer = CSVFormat.TDF.print(fw);
71
-            for (Element post : doc.select("#posts > li")) {
72
-                debugPost = post;
73
-                Element firstA = post.select("a:first-child").get(0);
74
-                printer.print(firstA.attr("id"));
75
-                LocalDateTime dateTime = LocalDateTime.of(date, LocalTime.parse(firstA.select(".horloge").text(), POST_TIME_FORMATTER));
76
-                printer.print(dateTime.format(POST_DATE_TIME_FORMATTER));
77
-                String info = "";
78
-                for (Element infoElement : firstA.select(".info")) {
79
-                    info = StringUtils.removeEnd(infoElement.text(), ">");
80
-                    infoElement.remove();
81
-                }
82
-                String login = "";
83
-                for (Element loginElement : firstA.select(".login")) {
84
-                    login = StringUtils.removeEnd(loginElement.text(), ">");
85
-                    loginElement.remove();
86
-                }
87
-                printer.print(info);
88
-                printer.print(login);
89
-                firstA.replaceWith(TextNode.createFromEncoded(firstA.html(), null));
90
-
91
-                for (Element element : doc.body().children().select(":not(a,b,i,s,u,tt,code,spoiler)")) {
92
-                    element.replaceWith(TextNode.createFromEncoded(element.toString(), null));
93
-                }
94
-                for (Element a : post.select("a")) {
95
-                    a.replaceWith(TextNode.createFromEncoded(a.attr("href"), null));
45
+                        @Override
46
+                        public void onResponse(Call call, Response res) throws IOException {
47
+                            if (res.isSuccessful()) {
48
+                                String body = res.body().string();
49
+                                if (StringUtils.isNotBlank(body)) {
50
+                                    try {
51
+                                        FileUtils.writeStringToFile(outputFile, body, "UTF-8");
52
+                                        System.out.println("Retrieved " + tribune + " at " + date);
53
+                                    } catch (Exception ex) {
54
+                                        System.err.println("Cannot retrieve " + tribune + " at " + date + " :");
55
+                                        System.err.println(ex);
56
+                                    }
57
+                                }
58
+                            }
59
+                        }
60
+                    });
96 61
                 }
97
-                		Cleaner cleaner = new Cleaner(MESSAGE_WHITELIST);
98
-		String message = cleaner.clean(Jsoup.parse(post.html())).html();
99
-
100
-                printer.print(message);
101
-                printer.println();
102 62
             }
103
-        } catch (Exception e) {
104
-            System.err.println("Cannot retrieve " + url);
105
-            System.err.println(debugPost);
106
-            System.err.println(e);
107 63
         }
108
-    }*/
64
+    }
65
+
109 66
 }