Browse Source

I used jsoup for this project. I txt files are stored in the r

Nicholas Maidanos 6 years ago
parent
commit
8e4ae45f4c
11 changed files with 6206 additions and 0 deletions
  1. 1
    0
      .idea/ZCW-Fido-URLFetch.iml
  2. 1
    0
      .idea/misc.xml
  3. 6
    0
      pom.xml
  4. 428
    0
      results/facebook.txt
  5. 732
    0
      results/google.txt
  6. 406
    0
      results/news.txt
  7. 3494
    0
      results/nyc.txt
  8. 1115
    0
      results/zipcode.txt
  9. BIN
      src/libs/jsoup-1.11.3.jar
  10. 23
    0
      src/main/java/Main.java
  11. BIN
      target/classes/Main.class

+ 1
- 0
.idea/ZCW-Fido-URLFetch.iml View File

@@ -11,5 +11,6 @@
11 11
     </content>
12 12
     <orderEntry type="inheritedJdk" />
13 13
     <orderEntry type="sourceFolder" forTests="false" />
14
+    <orderEntry type="library" name="jsoup-1.11.3" level="project" />
14 15
   </component>
15 16
 </module>

+ 1
- 0
.idea/misc.xml View File

@@ -7,4 +7,5 @@
7 7
       </list>
8 8
     </option>
9 9
   </component>
10
+  <component name="ProjectRootManager" version="2" project-jdk-name="1.8" project-jdk-type="JavaSDK" />
10 11
 </project>

+ 6
- 0
pom.xml View File

@@ -8,5 +8,11 @@
8 8
     <artifactId>ZCW-Fido-URLFetch</artifactId>
9 9
     <version>1.0-SNAPSHOT</version>
10 10
 
11
+    <dependency>
12
+        <groupId>org.jsoup</groupId>
13
+        <artifactId>jsoup</artifactId>
14
+        <version>1.11.3</version>
15
+    </dependency>
16
+
11 17
     
12 18
 </project>

+ 428
- 0
results/facebook.txt
File diff suppressed because it is too large
View File


+ 732
- 0
results/google.txt
File diff suppressed because it is too large
View File


+ 406
- 0
results/news.txt View File

@@ -0,0 +1,406 @@
1
+<html op="news">
2
+ <head>
3
+  <meta name="referrer" content="origin">
4
+  <meta name="viewport" content="width=device-width, initial-scale=1.0">
5
+  <link rel="stylesheet" type="text/css" href="news.css?EfHtUiUs331KRNNSbYDV"> 
6
+  <link rel="shortcut icon" href="favicon.ico"> 
7
+  <link rel="alternate" type="application/rss+xml" title="RSS" href="rss"> 
8
+  <title>Hacker News</title>
9
+ </head>
10
+ <body>
11
+  <center>
12
+   <table id="hnmain" border="0" cellpadding="0" cellspacing="0" width="85%" bgcolor="#f6f6ef"> 
13
+    <tbody>
14
+     <tr>
15
+      <td bgcolor="#ff6600">
16
+       <table border="0" cellpadding="0" cellspacing="0" width="100%" style="padding:2px">
17
+        <tbody>
18
+         <tr>
19
+          <td style="width:18px;padding-right:4px"><a href="https://news.ycombinator.com"><img src="y18.gif" width="18" height="18" style="border:1px white solid;"></a></td> 
20
+          <td style="line-height:12pt; height:10px;"><span class="pagetop"><b class="hnname"><a href="news">Hacker News</a></b> <a href="newest">new</a> | <a href="newcomments">comments</a> | <a href="show">show</a> | <a href="ask">ask</a> | <a href="jobs">jobs</a> | <a href="submit">submit</a> </span></td>
21
+          <td style="text-align:right;padding-right:4px;"><span class="pagetop"> <a href="login?goto=news">login</a> </span></td> 
22
+         </tr>
23
+        </tbody>
24
+       </table></td>
25
+     </tr> 
26
+     <tr style="height:10px"></tr>
27
+     <tr>
28
+      <td>
29
+       <table border="0" cellpadding="0" cellspacing="0" class="itemlist"> 
30
+        <tbody>
31
+         <tr class="athing" id="17259082"> 
32
+          <td align="right" valign="top" class="title"><span class="rank">1.</span></td> 
33
+          <td valign="top" class="votelinks">
34
+           <center>
35
+            <a id="up_17259082" href="vote?id=17259082&amp;how=up&amp;goto=news">
36
+             <div class="votearrow" title="upvote"></div></a>
37
+           </center></td>
38
+          <td class="title"><a href="https://blog.google/topics/ai/ai-principles/" class="storylink">AI at Google: our principles</a><span class="sitebit comhead"> (<a href="from?site=blog.google"><span class="sitestr">blog.google</span></a>)</span></td>
39
+         </tr>
40
+         <tr>
41
+          <td colspan="2"></td>
42
+          <td class="subtext"> <span class="score" id="score_17259082">322 points</span> by <a href="user?id=dannyrosen" class="hnuser">dannyrosen</a> <span class="age"><a href="item?id=17259082">4 hours ago</a></span> <span id="unv_17259082"></span> | <a href="hide?id=17259082&amp;goto=news">hide</a> | <a href="item?id=17259082">240&nbsp;comments</a> </td>
43
+         </tr> 
44
+         <tr class="spacer" style="height:5px"></tr> 
45
+         <tr class="athing" id="17259928"> 
46
+          <td align="right" valign="top" class="title"><span class="rank">2.</span></td> 
47
+          <td valign="top" class="votelinks">
48
+           <center>
49
+            <a id="up_17259928" href="vote?id=17259928&amp;how=up&amp;goto=news">
50
+             <div class="votearrow" title="upvote"></div></a>
51
+           </center></td>
52
+          <td class="title"><a href="https://hermitcore.org/2018/06/06/A-Rust-based-Unikernel/" class="storylink">A Rust-Based Unikernel: First Version of a Rust-Based LibOS</a><span class="sitebit comhead"> (<a href="from?site=hermitcore.org"><span class="sitestr">hermitcore.org</span></a>)</span></td>
53
+         </tr>
54
+         <tr>
55
+          <td colspan="2"></td>
56
+          <td class="subtext"> <span class="score" id="score_17259928">125 points</span> by <a href="user?id=ingve" class="hnuser">ingve</a> <span class="age"><a href="item?id=17259928">3 hours ago</a></span> <span id="unv_17259928"></span> | <a href="hide?id=17259928&amp;goto=news">hide</a> | <a href="item?id=17259928">30&nbsp;comments</a> </td>
57
+         </tr> 
58
+         <tr class="spacer" style="height:5px"></tr> 
59
+         <tr class="athing" id="17260500"> 
60
+          <td align="right" valign="top" class="title"><span class="rank">3.</span></td> 
61
+          <td valign="top" class="votelinks">
62
+           <center>
63
+            <a id="up_17260500" href="vote?id=17260500&amp;how=up&amp;goto=news">
64
+             <div class="votearrow" title="upvote"></div></a>
65
+           </center></td>
66
+          <td class="title"><a href="https://www.nature.com/articles/d41586-018-05357-w" class="storylink">Sucking carbon dioxide from air is cheaper than scientists thought</a><span class="sitebit comhead"> (<a href="from?site=nature.com"><span class="sitestr">nature.com</span></a>)</span></td>
67
+         </tr>
68
+         <tr>
69
+          <td colspan="2"></td>
70
+          <td class="subtext"> <span class="score" id="score_17260500">106 points</span> by <a href="user?id=kjeetgill" class="hnuser">kjeetgill</a> <span class="age"><a href="item?id=17260500">2 hours ago</a></span> <span id="unv_17260500"></span> | <a href="hide?id=17260500&amp;goto=news">hide</a> | <a href="item?id=17260500">86&nbsp;comments</a> </td>
71
+         </tr> 
72
+         <tr class="spacer" style="height:5px"></tr> 
73
+         <tr class="athing" id="17257610"> 
74
+          <td align="right" valign="top" class="title"><span class="rank">4.</span></td> 
75
+          <td valign="top" class="votelinks">
76
+           <center>
77
+            <a id="up_17257610" href="vote?id=17257610&amp;how=up&amp;goto=news">
78
+             <div class="votearrow" title="upvote"></div></a>
79
+           </center></td>
80
+          <td class="title"><a href="https://open.nytimes.com/how-the-new-york-times-uses-software-to-recognize-members-of-congress-29b46dd426c7" class="storylink">How The New York Times Uses Software to Recognize Members of Congress</a><span class="sitebit comhead"> (<a href="from?site=nytimes.com"><span class="sitestr">nytimes.com</span></a>)</span></td>
81
+         </tr>
82
+         <tr>
83
+          <td colspan="2"></td>
84
+          <td class="subtext"> <span class="score" id="score_17257610">210 points</span> by <a href="user?id=beriboy" class="hnuser">beriboy</a> <span class="age"><a href="item?id=17257610">6 hours ago</a></span> <span id="unv_17257610"></span> | <a href="hide?id=17257610&amp;goto=news">hide</a> | <a href="item?id=17257610">95&nbsp;comments</a> </td>
85
+         </tr> 
86
+         <tr class="spacer" style="height:5px"></tr> 
87
+         <tr class="athing" id="17261063"> 
88
+          <td align="right" valign="top" class="title"><span class="rank">5.</span></td> 
89
+          <td valign="top" class="votelinks">
90
+           <center>
91
+            <a id="up_17261063" href="vote?id=17261063&amp;how=up&amp;goto=news">
92
+             <div class="votearrow" title="upvote"></div></a>
93
+           </center></td>
94
+          <td class="title"><a href="https://github.com/higgsfield/RL-Adventure-2" class="storylink">Reinforcement Learning: From Zero to State of the Art with Pytorch 4</a><span class="sitebit comhead"> (<a href="from?site=github.com"><span class="sitestr">github.com</span></a>)</span></td>
95
+         </tr>
96
+         <tr>
97
+          <td colspan="2"></td>
98
+          <td class="subtext"> <span class="score" id="score_17261063">11 points</span> by <a href="user?id=codentropy" class="hnuser">codentropy</a> <span class="age"><a href="item?id=17261063">51 minutes ago</a></span> <span id="unv_17261063"></span> | <a href="hide?id=17261063&amp;goto=news">hide</a> | <a href="item?id=17261063">discuss</a> </td>
99
+         </tr> 
100
+         <tr class="spacer" style="height:5px"></tr> 
101
+         <tr class="athing" id="17260911"> 
102
+          <td align="right" valign="top" class="title"><span class="rank">6.</span></td> 
103
+          <td valign="top" class="votelinks">
104
+           <center>
105
+            <a id="up_17260911" href="vote?id=17260911&amp;how=up&amp;goto=news">
106
+             <div class="votearrow" title="upvote"></div></a>
107
+           </center></td>
108
+          <td class="title"><a href="https://www.newyorker.com/books/under-review/the-bullshit-job-boom" class="storylink">The Bullshit-Job Boom</a><span class="sitebit comhead"> (<a href="from?site=newyorker.com"><span class="sitestr">newyorker.com</span></a>)</span></td>
109
+         </tr>
110
+         <tr>
111
+          <td colspan="2"></td>
112
+          <td class="subtext"> <span class="score" id="score_17260911">19 points</span> by <a href="user?id=wyclif" class="hnuser">wyclif</a> <span class="age"><a href="item?id=17260911">1 hour ago</a></span> <span id="unv_17260911"></span> | <a href="hide?id=17260911&amp;goto=news">hide</a> | <a href="item?id=17260911">5&nbsp;comments</a> </td>
113
+         </tr> 
114
+         <tr class="spacer" style="height:5px"></tr> 
115
+         <tr class="athing" id="17260221"> 
116
+          <td align="right" valign="top" class="title"><span class="rank">7.</span></td> 
117
+          <td valign="top" class="votelinks">
118
+           <center>
119
+            <a id="up_17260221" href="vote?id=17260221&amp;how=up&amp;goto=news">
120
+             <div class="votearrow" title="upvote"></div></a>
121
+           </center></td>
122
+          <td class="title"><a href="https://github.com/go-gitea/gitea/issues/4167" class="storylink">Giteabot account was compromised</a><span class="sitebit comhead"> (<a href="from?site=github.com"><span class="sitestr">github.com</span></a>)</span></td>
123
+         </tr>
124
+         <tr>
125
+          <td colspan="2"></td>
126
+          <td class="subtext"> <span class="score" id="score_17260221">85 points</span> by <a href="user?id=sarif" class="hnuser">sarif</a> <span class="age"><a href="item?id=17260221">2 hours ago</a></span> <span id="unv_17260221"></span> | <a href="hide?id=17260221&amp;goto=news">hide</a> | <a href="item?id=17260221">29&nbsp;comments</a> </td>
127
+         </tr> 
128
+         <tr class="spacer" style="height:5px"></tr> 
129
+         <tr class="athing" id="17258275"> 
130
+          <td align="right" valign="top" class="title"><span class="rank">8.</span></td> 
131
+          <td valign="top" class="votelinks">
132
+           <center>
133
+            <a id="up_17258275" href="vote?id=17258275&amp;how=up&amp;goto=news">
134
+             <div class="votearrow" title="upvote"></div></a>
135
+           </center></td>
136
+          <td class="title"><a href="https://blog.github.com/2018-06-07-what-laser-cutting-taught-me-about-contribution-graphs/" class="storylink">Life as a GitHub Intern: What laser cutting taught me about contribution graphs</a><span class="sitebit comhead"> (<a href="from?site=blog.github.com"><span class="sitestr">blog.github.com</span></a>)</span></td>
137
+         </tr>
138
+         <tr>
139
+          <td colspan="2"></td>
140
+          <td class="subtext"> <span class="score" id="score_17258275">62 points</span> by <a href="user?id=brianllamar" class="hnuser">brianllamar</a> <span class="age"><a href="item?id=17258275">5 hours ago</a></span> <span id="unv_17258275"></span> | <a href="hide?id=17258275&amp;goto=news">hide</a> | <a href="item?id=17258275">21&nbsp;comments</a> </td>
141
+         </tr> 
142
+         <tr class="spacer" style="height:5px"></tr> 
143
+         <tr class="athing" id="17257725"> 
144
+          <td align="right" valign="top" class="title"><span class="rank">9.</span></td> 
145
+          <td valign="top" class="votelinks">
146
+           <center>
147
+            <a id="up_17257725" href="vote?id=17257725&amp;how=up&amp;goto=news">
148
+             <div class="votearrow" title="upvote"></div></a>
149
+           </center></td>
150
+          <td class="title"><a href="https://www.nytimes.com/2018/06/07/business/economy/modular-housing.html" class="storylink">Developers are manufacturing prefabricated apartment buildings</a><span class="sitebit comhead"> (<a href="from?site=nytimes.com"><span class="sitestr">nytimes.com</span></a>)</span></td>
151
+         </tr>
152
+         <tr>
153
+          <td colspan="2"></td>
154
+          <td class="subtext"> <span class="score" id="score_17257725">114 points</span> by <a href="user?id=johnny313" class="hnuser">johnny313</a> <span class="age"><a href="item?id=17257725">6 hours ago</a></span> <span id="unv_17257725"></span> | <a href="hide?id=17257725&amp;goto=news">hide</a> | <a href="item?id=17257725">123&nbsp;comments</a> </td>
155
+         </tr> 
156
+         <tr class="spacer" style="height:5px"></tr> 
157
+         <tr class="athing" id="17257239"> 
158
+          <td align="right" valign="top" class="title"><span class="rank">10.</span></td> 
159
+          <td valign="top" class="votelinks">
160
+           <center>
161
+            <a id="up_17257239" href="vote?id=17257239&amp;how=up&amp;goto=news">
162
+             <div class="votearrow" title="upvote"></div></a>
163
+           </center></td>
164
+          <td class="title"><a href="https://arstechnica.com/cars/2018/06/ntsb-autopilot-steered-tesla-car-toward-traffic-barrier-before-deadly-crash/" class="storylink">NTSB: Autopilot steered Tesla car toward traffic barrier before deadly crash</a><span class="sitebit comhead"> (<a href="from?site=arstechnica.com"><span class="sitestr">arstechnica.com</span></a>)</span></td>
165
+         </tr>
166
+         <tr>
167
+          <td colspan="2"></td>
168
+          <td class="subtext"> <span class="score" id="score_17257239">271 points</span> by <a href="user?id=nwrk" class="hnuser">nwrk</a> <span class="age"><a href="item?id=17257239">7 hours ago</a></span> <span id="unv_17257239"></span> | <a href="hide?id=17257239&amp;goto=news">hide</a> | <a href="item?id=17257239">258&nbsp;comments</a> </td>
169
+         </tr> 
170
+         <tr class="spacer" style="height:5px"></tr> 
171
+         <tr class="athing" id="17256923"> 
172
+          <td align="right" valign="top" class="title"><span class="rank">11.</span></td> 
173
+          <td valign="top" class="votelinks">
174
+           <center>
175
+            <a id="up_17256923" href="vote?id=17256923&amp;how=up&amp;goto=news">
176
+             <div class="votearrow" title="upvote"></div></a>
177
+           </center></td>
178
+          <td class="title"><a href="https://sparktoro.com/blog/raised-a-very-unusual-round-of-funding-were-open-sourcing-our-docs/" class="storylink">Raising a Very Unusual Round of Funding and Open-Sourcing Our Docs</a><span class="sitebit comhead"> (<a href="from?site=sparktoro.com"><span class="sitestr">sparktoro.com</span></a>)</span></td>
179
+         </tr>
180
+         <tr>
181
+          <td colspan="2"></td>
182
+          <td class="subtext"> <span class="score" id="score_17256923">96 points</span> by <a href="user?id=rchaudhary" class="hnuser">rchaudhary</a> <span class="age"><a href="item?id=17256923">7 hours ago</a></span> <span id="unv_17256923"></span> | <a href="hide?id=17256923&amp;goto=news">hide</a> | <a href="item?id=17256923">21&nbsp;comments</a> </td>
183
+         </tr> 
184
+         <tr class="spacer" style="height:5px"></tr> 
185
+         <tr class="athing" id="17256443"> 
186
+          <td align="right" valign="top" class="title"><span class="rank">12.</span></td> 
187
+          <td valign="top" class="votelinks">
188
+           <center>
189
+            <a id="up_17256443" href="vote?id=17256443&amp;how=up&amp;goto=news">
190
+             <div class="votearrow" title="upvote"></div></a>
191
+           </center></td>
192
+          <td class="title"><a href="https://www.boxfactura.com/pulltorefresh.js/" class="storylink">Pull to Refresh.js</a><span class="sitebit comhead"> (<a href="from?site=boxfactura.com"><span class="sitestr">boxfactura.com</span></a>)</span></td>
193
+         </tr>
194
+         <tr>
195
+          <td colspan="2"></td>
196
+          <td class="subtext"> <span class="score" id="score_17256443">137 points</span> by <a href="user?id=evo_9" class="hnuser">evo_9</a> <span class="age"><a href="item?id=17256443">8 hours ago</a></span> <span id="unv_17256443"></span> | <a href="hide?id=17256443&amp;goto=news">hide</a> | <a href="item?id=17256443">59&nbsp;comments</a> </td>
197
+         </tr> 
198
+         <tr class="spacer" style="height:5px"></tr> 
199
+         <tr class="athing" id="17258492"> 
200
+          <td align="right" valign="top" class="title"><span class="rank">13.</span></td> 
201
+          <td valign="top" class="votelinks">
202
+           <center>
203
+            <a id="up_17258492" href="vote?id=17258492&amp;how=up&amp;goto=news">
204
+             <div class="votearrow" title="upvote"></div></a>
205
+           </center></td>
206
+          <td class="title"><a href="https://www.nasa.gov/press-release/nasa-finds-ancient-organic-material-mysterious-methane-on-mars" class="storylink">NASA Finds Ancient Organic Material, Mysterious Methane on Mars</a><span class="sitebit comhead"> (<a href="from?site=nasa.gov"><span class="sitestr">nasa.gov</span></a>)</span></td>
207
+         </tr>
208
+         <tr>
209
+          <td colspan="2"></td>
210
+          <td class="subtext"> <span class="score" id="score_17258492">237 points</span> by <a href="user?id=AstralWalker" class="hnuser">AstralWalker</a> <span class="age"><a href="item?id=17258492">5 hours ago</a></span> <span id="unv_17258492"></span> | <a href="hide?id=17258492&amp;goto=news">hide</a> | <a href="item?id=17258492">79&nbsp;comments</a> </td>
211
+         </tr> 
212
+         <tr class="spacer" style="height:5px"></tr> 
213
+         <tr class="athing" id="17251568"> 
214
+          <td align="right" valign="top" class="title"><span class="rank">14.</span></td> 
215
+          <td valign="top" class="votelinks">
216
+           <center>
217
+            <a id="up_17251568" href="vote?id=17251568&amp;how=up&amp;goto=news">
218
+             <div class="votearrow" title="upvote"></div></a>
219
+           </center></td>
220
+          <td class="title"><a href="https://www.the-tls.co.uk/articles/public/passe-presomptif-vercingetorix/" class="storylink">Passé présomptif: on Vercingetorix</a><span class="sitebit comhead"> (<a href="from?site=the-tls.co.uk"><span class="sitestr">the-tls.co.uk</span></a>)</span></td>
221
+         </tr>
222
+         <tr>
223
+          <td colspan="2"></td>
224
+          <td class="subtext"> <span class="score" id="score_17251568">34 points</span> by <a href="user?id=lermontov" class="hnuser">lermontov</a> <span class="age"><a href="item?id=17251568">4 hours ago</a></span> <span id="unv_17251568"></span> | <a href="hide?id=17251568&amp;goto=news">hide</a> | <a href="item?id=17251568">8&nbsp;comments</a> </td>
225
+         </tr> 
226
+         <tr class="spacer" style="height:5px"></tr> 
227
+         <tr class="athing" id="17257482"> 
228
+          <td align="right" valign="top" class="title"><span class="rank">15.</span></td> 
229
+          <td valign="top" class="votelinks">
230
+           <center>
231
+            <a id="up_17257482" href="vote?id=17257482&amp;how=up&amp;goto=news">
232
+             <div class="votearrow" title="upvote"></div></a>
233
+           </center></td>
234
+          <td class="title"><a href="https://www.nowpublishers.com/article/Details/PGL-038" class="storylink">Reconciling Abstraction with High Performance: A MetaOCaml Approach</a><span class="sitebit comhead"> (<a href="from?site=nowpublishers.com"><span class="sitestr">nowpublishers.com</span></a>)</span></td>
235
+         </tr>
236
+         <tr>
237
+          <td colspan="2"></td>
238
+          <td class="subtext"> <span class="score" id="score_17257482">56 points</span> by <a href="user?id=myth_drannon" class="hnuser">myth_drannon</a> <span class="age"><a href="item?id=17257482">7 hours ago</a></span> <span id="unv_17257482"></span> | <a href="hide?id=17257482&amp;goto=news">hide</a> | <a href="item?id=17257482">2&nbsp;comments</a> </td>
239
+         </tr> 
240
+         <tr class="spacer" style="height:5px"></tr> 
241
+         <tr class="athing" id="17260388"> 
242
+          <td align="right" valign="top" class="title"><span class="rank">16.</span></td> 
243
+          <td valign="top" class="votelinks">
244
+           <center>
245
+            <a id="up_17260388" href="vote?id=17260388&amp;how=up&amp;goto=news">
246
+             <div class="votearrow" title="upvote"></div></a>
247
+           </center></td>
248
+          <td class="title"><a href="https://www.functionalgeekery.com/episode-48-matthias-felleisen/" class="storylink">Functional Geekery Episode 48 – Matthias Felleisen [audio]</a><span class="sitebit comhead"> (<a href="from?site=functionalgeekery.com"><span class="sitestr">functionalgeekery.com</span></a>)</span></td>
249
+         </tr>
250
+         <tr>
251
+          <td colspan="2"></td>
252
+          <td class="subtext"> <span class="score" id="score_17260388">11 points</span> by <a href="user?id=TheAsprngHacker" class="hnuser">TheAsprngHacker</a> <span class="age"><a href="item?id=17260388">2 hours ago</a></span> <span id="unv_17260388"></span> | <a href="hide?id=17260388&amp;goto=news">hide</a> | <a href="item?id=17260388">discuss</a> </td>
253
+         </tr> 
254
+         <tr class="spacer" style="height:5px"></tr> 
255
+         <tr class="athing" id="17259063"> 
256
+          <td align="right" valign="top" class="title"><span class="rank">17.</span></td> 
257
+          <td valign="top" class="votelinks">
258
+           <center>
259
+            <a id="up_17259063" href="vote?id=17259063&amp;how=up&amp;goto=news">
260
+             <div class="votearrow" title="upvote"></div></a>
261
+           </center></td>
262
+          <td class="title"><a href="http://money.cnn.com/2018/06/07/technology/facebook-public-post-error/index.html" class="storylink">Facebook bug set 14M users' sharing settings to public</a><span class="sitebit comhead"> (<a href="from?site=cnn.com"><span class="sitestr">cnn.com</span></a>)</span></td>
263
+         </tr>
264
+         <tr>
265
+          <td colspan="2"></td>
266
+          <td class="subtext"> <span class="score" id="score_17259063">135 points</span> by <a href="user?id=uptown" class="hnuser">uptown</a> <span class="age"><a href="item?id=17259063">4 hours ago</a></span> <span id="unv_17259063"></span> | <a href="hide?id=17259063&amp;goto=news">hide</a> | <a href="item?id=17259063">55&nbsp;comments</a> </td>
267
+         </tr> 
268
+         <tr class="spacer" style="height:5px"></tr> 
269
+         <tr class="athing" id="17256709"> 
270
+          <td align="right" valign="top" class="title"><span class="rank">18.</span></td> 
271
+          <td valign="top" class="votelinks">
272
+           <center>
273
+            <a id="up_17256709" href="vote?id=17256709&amp;how=up&amp;goto=news">
274
+             <div class="votearrow" title="upvote"></div></a>
275
+           </center></td>
276
+          <td class="title"><a href="https://wiki.reactivemicro.com/Applesauce" class="storylink">Applesauce – Make exact images of copy-protected Apple II floppy disks</a><span class="sitebit comhead"> (<a href="from?site=reactivemicro.com"><span class="sitestr">reactivemicro.com</span></a>)</span></td>
277
+         </tr>
278
+         <tr>
279
+          <td colspan="2"></td>
280
+          <td class="subtext"> <span class="score" id="score_17256709">67 points</span> by <a href="user?id=timmytokyo" class="hnuser">timmytokyo</a> <span class="age"><a href="item?id=17256709">8 hours ago</a></span> <span id="unv_17256709"></span> | <a href="hide?id=17256709&amp;goto=news">hide</a> | <a href="item?id=17256709">22&nbsp;comments</a> </td>
281
+         </tr> 
282
+         <tr class="spacer" style="height:5px"></tr> 
283
+         <tr class="athing" id="17259594"> 
284
+          <td align="right" valign="top" class="title"><span class="rank">19.</span></td> 
285
+          <td valign="top" class="votelinks">
286
+           <center>
287
+            <a id="up_17259594" href="vote?id=17259594&amp;how=up&amp;goto=news">
288
+             <div class="votearrow" title="upvote"></div></a>
289
+           </center></td>
290
+          <td class="title"><a href="https://www.wsj.com/articles/a-costly-deadly-obsession-with-coal-1528282800" class="storylink">A Costly, Deadly Obsession with Coal</a><span class="sitebit comhead"> (<a href="from?site=wsj.com"><span class="sitestr">wsj.com</span></a>)</span></td>
291
+         </tr>
292
+         <tr>
293
+          <td colspan="2"></td>
294
+          <td class="subtext"> <span class="score" id="score_17259594">81 points</span> by <a href="user?id=JumpCrisscross" class="hnuser">JumpCrisscross</a> <span class="age"><a href="item?id=17259594">3 hours ago</a></span> <span id="unv_17259594"></span> | <a href="hide?id=17259594&amp;goto=news">hide</a> | <a href="item?id=17259594">75&nbsp;comments</a> </td>
295
+         </tr> 
296
+         <tr class="spacer" style="height:5px"></tr> 
297
+         <tr class="athing" id="17256394"> 
298
+          <td align="right" valign="top" class="title"><span class="rank">20.</span></td> 
299
+          <td valign="top" class="votelinks">
300
+           <center>
301
+            <a id="up_17256394" href="vote?id=17256394&amp;how=up&amp;goto=news">
302
+             <div class="votearrow" title="upvote"></div></a>
303
+           </center></td>
304
+          <td class="title"><a href="https://andrewkelley.me/post/full-time-zig.html" class="storylink">I Quit My Job to Live on Donations</a><span class="sitebit comhead"> (<a href="from?site=andrewkelley.me"><span class="sitestr">andrewkelley.me</span></a>)</span></td>
305
+         </tr>
306
+         <tr>
307
+          <td colspan="2"></td>
308
+          <td class="subtext"> <span class="score" id="score_17256394">317 points</span> by <a href="user?id=AndyKelley" class="hnuser">AndyKelley</a> <span class="age"><a href="item?id=17256394">8 hours ago</a></span> <span id="unv_17256394"></span> | <a href="hide?id=17256394&amp;goto=news">hide</a> | <a href="item?id=17256394">196&nbsp;comments</a> </td>
309
+         </tr> 
310
+         <tr class="spacer" style="height:5px"></tr> 
311
+         <tr class="athing" id="17255418"> 
312
+          <td align="right" valign="top" class="title"><span class="rank">21.</span></td> 
313
+          <td valign="top" class="votelinks">
314
+           <center>
315
+            <a id="up_17255418" href="vote?id=17255418&amp;how=up&amp;goto=news">
316
+             <div class="votearrow" title="upvote"></div></a>
317
+           </center></td>
318
+          <td class="title"><a href="https://github.com/jonpe960/ufsm/tree/ufsm-0.1.0" class="storylink">Show HN: µFSM – A state chart library for embedded applications</a><span class="sitebit comhead"> (<a href="from?site=github.com"><span class="sitestr">github.com</span></a>)</span></td>
319
+         </tr>
320
+         <tr>
321
+          <td colspan="2"></td>
322
+          <td class="subtext"> <span class="score" id="score_17255418">108 points</span> by <a href="user?id=j0p" class="hnuser">j0p</a> <span class="age"><a href="item?id=17255418">10 hours ago</a></span> <span id="unv_17255418"></span> | <a href="hide?id=17255418&amp;goto=news">hide</a> | <a href="item?id=17255418">24&nbsp;comments</a> </td>
323
+         </tr> 
324
+         <tr class="spacer" style="height:5px"></tr> 
325
+         <tr class="athing" id="17257143"> 
326
+          <td align="right" valign="top" class="title"><span class="rank">22.</span></td> 
327
+          <td valign="top" class="votelinks">
328
+           <center>
329
+            <a id="up_17257143" href="vote?id=17257143&amp;how=up&amp;goto=news">
330
+             <div class="votearrow" title="upvote"></div></a>
331
+           </center></td>
332
+          <td class="title"><a href="https://blog.insightdatascience.com/reinforcement-learning-from-scratch-819b65f074d8" class="storylink">Reinforcement Learning from scratch</a><span class="sitebit comhead"> (<a href="from?site=insightdatascience.com"><span class="sitestr">insightdatascience.com</span></a>)</span></td>
333
+         </tr>
334
+         <tr>
335
+          <td colspan="2"></td>
336
+          <td class="subtext"> <span class="score" id="score_17257143">158 points</span> by <a href="user?id=e_ameisen" class="hnuser">e_ameisen</a> <span class="age"><a href="item?id=17257143">7 hours ago</a></span> <span id="unv_17257143"></span> | <a href="hide?id=17257143&amp;goto=news">hide</a> | <a href="item?id=17257143">22&nbsp;comments</a> </td>
337
+         </tr> 
338
+         <tr class="spacer" style="height:5px"></tr> 
339
+         <tr class="athing" id="17255140"> 
340
+          <td align="right" valign="top" class="title"><span class="rank">23.</span></td> 
341
+          <td valign="top" class="votelinks">
342
+           <center>
343
+            <a id="up_17255140" href="vote?id=17255140&amp;how=up&amp;goto=news">
344
+             <div class="votearrow" title="upvote"></div></a>
345
+           </center></td>
346
+          <td class="title"><a href="https://thebreakfastpost.com/2017/12/02/notes-on-idris/" class="storylink">Notes on Idris (2017)</a><span class="sitebit comhead"> (<a href="from?site=thebreakfastpost.com"><span class="sitestr">thebreakfastpost.com</span></a>)</span></td>
347
+         </tr>
348
+         <tr>
349
+          <td colspan="2"></td>
350
+          <td class="subtext"> <span class="score" id="score_17255140">128 points</span> by <a href="user?id=jxub" class="hnuser">jxub</a> <span class="age"><a href="item?id=17255140">11 hours ago</a></span> <span id="unv_17255140"></span> | <a href="hide?id=17255140&amp;goto=news">hide</a> | <a href="item?id=17255140">47&nbsp;comments</a> </td>
351
+         </tr> 
352
+         <tr class="spacer" style="height:5px"></tr> 
353
+         <tr class="athing" id="17260224"> 
354
+          <td align="right" valign="top" class="title"><span class="rank">24.</span></td> 
355
+          <td valign="top" class="votelinks">
356
+           <center>
357
+            <a id="up_17260224" href="vote?id=17260224&amp;how=up&amp;goto=news">
358
+             <div class="votearrow" title="upvote"></div></a>
359
+           </center></td>
360
+          <td class="title"><a href="https://hackernoon.com/fortran-is-still-a-thing-fc84df4cf638" class="storylink">Fortran is still a thing (2017)</a><span class="sitebit comhead"> (<a href="from?site=hackernoon.com"><span class="sitestr">hackernoon.com</span></a>)</span></td>
361
+         </tr>
362
+         <tr>
363
+          <td colspan="2"></td>
364
+          <td class="subtext"> <span class="score" id="score_17260224">42 points</span> by <a href="user?id=aphextron" class="hnuser">aphextron</a> <span class="age"><a href="item?id=17260224">2 hours ago</a></span> <span id="unv_17260224"></span> | <a href="hide?id=17260224&amp;goto=news">hide</a> | <a href="item?id=17260224">26&nbsp;comments</a> </td>
365
+         </tr> 
366
+         <tr class="spacer" style="height:5px"></tr> 
367
+         <tr class="athing" id="17257777"> 
368
+          <td align="right" valign="top" class="title"><span class="rank">25.</span></td> 
369
+          <td valign="top" class="votelinks">
370
+           <center>
371
+            <a id="up_17257777" href="vote?id=17257777&amp;how=up&amp;goto=news">
372
+             <div class="votearrow" title="upvote"></div></a>
373
+           </center></td>
374
+          <td class="title"><a href="http://www.emeraldprogramminglanguage.org/" class="storylink">The Emerald Programming Language</a><span class="sitebit comhead"> (<a href="from?site=emeraldprogramminglanguage.org"><span class="sitestr">emeraldprogramminglanguage.org</span></a>)</span></td>
375
+         </tr>
376
+         <tr>
377
+          <td colspan="2"></td>
378
+          <td class="subtext"> <span class="score" id="score_17257777">31 points</span> by <a href="user?id=palerdot" class="hnuser">palerdot</a> <span class="age"><a href="item?id=17257777">6 hours ago</a></span> <span id="unv_17257777"></span> | <a href="hide?id=17257777&amp;goto=news">hide</a> | <a href="item?id=17257777">32&nbsp;comments</a> </td>
379
+         </tr> 
380
+         <tr class="spacer" style="height:5px"></tr> 
381
+         <tr class="athing" id="17259483"> 
382
+          <td align="right" valign="top" class="title"><span class="rank">26.</span></td> 
383
+          <td valign="top" class="votelinks">
384
+           <center>
385
+            <a id="up_17259483" href="vote?id=17259483&amp;how=up&amp;goto=news">
386
+             <div class="votearrow" title="upvote"></div></a>
387
+           </center></td>
388
+          <td class="title"><a href="https://www.bloomberg.com/graphics/2015-paul-ford-what-is-code/" class="storylink">What is code</a><span class="sitebit comhead"> (<a href="from?site=bloomberg.com"><span class="sitestr">bloomberg.com</span></a>)</span></td>
389
+         </tr>
390
+         <tr>
391
+          <td colspan="2"></td>
392
+          <td class="subtext"> <span class="score" id="score_17259483">14 points</span> by <a href="user?id=henridf" class="hnuser">henridf</a> <span class="age"><a href="item?id=17259483">4 hours ago</a></span> <span id="unv_17259483"></span> | <a href="hide?id=17259483&amp;goto=news">hide</a> | <a href="item?id=17259483">2&nbsp;comments</a> </td>
393
+         </tr> 
394
+         <tr class="spacer" style="height:5px"></tr> 
395
+         <tr class="athing" id="17260148"> 
396
+          <td align="right" valign="top" class="title"><span class="rank">27.</span></td> 
397
+          <td valign="top" class="votelinks">
398
+           <center>
399
+            <a id="up_17260148" href="vote?id=17260148&amp;how=up&amp;goto=news">
400
+             <div class="votearrow" title="upvote"></div></a>
401
+           </center></td>
402
+          <td class="title"><a href="https://www.eff.org/deeplinks/2018/06/eus-copyright-proposal-extremely-bad-news-everyone-even-especially-wikipedia" class="storylink">The EU's Copyright Proposal Is Extremely Bad News for Everyone, Even Wikipedia</a><span class="sitebit comhead"> (<a href="from?site=eff.org"><span class="sitestr">eff.org</span></a>)</span></td>
403
+         </tr>
404
+         <tr>
405
+          <td colspan="2"></td>
406
+          <td class="subtext"> <span class="score" id="score_17260148">138 points</span> by <a href="user?id=Mononokay" class="hnuser">Mononokay</a> <span class="age"><a href="item?id=17260148">2 hours ago</a></span> <span id="unv_17260148"></span> | <a href="hide?id=17260148&amp;goto=news">hide</a> | <a href="item?id=172

+ 3494
- 0
results/nyc.txt
File diff suppressed because it is too large
View File


+ 1115
- 0
results/zipcode.txt
File diff suppressed because it is too large
View File


BIN
src/libs/jsoup-1.11.3.jar View File


+ 23
- 0
src/main/java/Main.java View File

@@ -0,0 +1,23 @@
1
+import org.jsoup.Jsoup;
2
+import org.jsoup.nodes.Document;
3
+import java.io.IOException;
4
+import java.io.PrintWriter;
5
+
6
+public class Main {
7
+
8
+    public static void main(String[] args) throws IOException {
9
+        websiteToFile("news.txt", "https://news.ycombinator.com/");
10
+        //websiteToFile("pixel.txt","http://lorempixel.com/400/200/");
11
+        websiteToFile("nyc.txt","https://www.nytimes.com/");
12
+        //websiteToFile("example.txt","https://example.com/");
13
+        websiteToFile("google.txt","https://www.google.com/");
14
+        websiteToFile("facebook.txt","https://www.facebook.com/");
15
+        websiteToFile("zipcode.txt","http://www.zipcodewilmington.com/");
16
+    }
17
+
18
+    private static void websiteToFile(String name, String url) throws IOException {
19
+        Document doc = Jsoup.connect(url).get();
20
+        PrintWriter out = new PrintWriter("result/" + name);
21
+        out.print(doc.toString());
22
+    }
23
+}

BIN
target/classes/Main.class View File