Commit b8f9d238 authored by Nicolas Peifer's avatar Nicolas Peifer

new data model due to advanced copyright filtering

parent cfb0c719
......@@ -2,7 +2,7 @@
<html>
<head>
<meta charset="UTF-8">
<title>Bücherbucht.de - gemeinfreie eBooks</title>
<title>Bücherbucht.de - Gemeinfreie eBooks</title>
<meta name="description"
content="Alternative zum gesperrten Portal Gutenberg.org mit ca. 1.300 gemeinfreien, deutschsprachigen eBooks zum Downloaden.">
<meta name="viewport"
......@@ -52,7 +52,7 @@
<a href='https://cand.pglaf.org/germany/index.html'>[pglaf.org]</a>
</p>
<p class="text-justify">
Zur Zeit werden auf Bücherbucht.de ca. 1.300 deutschsprachige Bücher
Zur Zeit werden auf Bücherbucht.de ca. 1.200 deutschsprachige Bücher
aufgelistet, bei denen das Urheberrecht nach EU-Recht verfallen ist
(Regelschutzfrist: 70 Jahre p. m. a.). Die Download-Links verweisen
auf einen Gutenberg.org-Mirror (Spiegelserver), d. h. Bücherbucht.de
......
......@@ -3,7 +3,7 @@
<modelVersion>4.0.0</modelVersion>
<groupId>org.codecoop.iceflower.gutenbergutils</groupId>
<artifactId>gutenberg-utils</artifactId>
<version>1.0.0</version>
<version>1.1.0-SNAPSHOT</version>
<build>
<sourceDirectory>src</sourceDirectory>
<plugins>
......
/*
* Copyright (c) 2018-2019 Nicolas Peifer
* This work is free. You can redistribute it and/or modify it under the
* terms of the Do What The Fuck You Want To Public License, Version 2,
* as published by Sam Hocevar. See the COPYING file for more details.
*/
package org.codecoop.iceflower.gutenbergutils;
public interface CliOptions {
......
/*
* Copyright (c) 2018-2019 Nicolas Peifer
* This work is free. You can redistribute it and/or modify it under the
* terms of the Do What The Fuck You Want To Public License, Version 2,
* as published by Sam Hocevar. See the COPYING file for more details.
*/
package org.codecoop.iceflower.gutenbergutils;
public class DownloadUrlBuilder {
......
/*
* Copyright (c) 2018-2019 Nicolas Peifer
* This work is free. You can redistribute it and/or modify it under the
* terms of the Do What The Fuck You Want To Public License, Version 2,
* as published by Sam Hocevar. See the COPYING file for more details.
*/
package org.codecoop.iceflower.gutenbergutils;
import java.util.ArrayList;
......
/*
* Copyright (c) 2018-2019 Nicolas Peifer
* This work is free. You can redistribute it and/or modify it under the
* terms of the Do What The Fuck You Want To Public License, Version 2,
* as published by Sam Hocevar. See the COPYING file for more details.
*/
package org.codecoop.iceflower.gutenbergutils;
import java.io.File;
......
/*
* Copyright (c) 2018-2019 Nicolas Peifer
* This work is free. You can redistribute it and/or modify it under the
* terms of the Do What The Fuck You Want To Public License, Version 2,
* as published by Sam Hocevar. See the COPYING file for more details.
*/
package org.codecoop.iceflower.gutenbergutils.database;
import java.sql.Connection;
......
/*
* Copyright (c) 2018-2019 Nicolas Peifer
* This work is free. You can redistribute it and/or modify it under the
* terms of the Do What The Fuck You Want To Public License, Version 2,
* as published by Sam Hocevar. See the COPYING file for more details.
*/
package org.codecoop.iceflower.gutenbergutils.database;
public interface DbParameters {
......
/*
* Copyright (c) 2018-2019 Nicolas Peifer
* This work is free. You can redistribute it and/or modify it under the
* terms of the Do What The Fuck You Want To Public License, Version 2,
* as published by Sam Hocevar. See the COPYING file for more details.
*/
package org.codecoop.iceflower.gutenbergutils.database;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.List;
import org.codecoop.iceflower.gutenbergutils.ebook.Ebook;
import org.codecoop.iceflower.gutenbergutils.database.entity.Book;
import org.codecoop.iceflower.gutenbergutils.database.entity.Person;
public class InsertOperation extends AbstractDbOperation {
......@@ -23,26 +33,76 @@ public class InsertOperation extends AbstractDbOperation {
@Override
protected void prepare(String sqlCommand) throws SQLException {
Statement statement = getConnection().createStatement();
statement.execute(SqlCommands.DROP_TABLE);
// drop tables
execute(SqlCommands.DROP_TABLE_BOOK);
execute(SqlCommands.DROP_TABLE_PERSON);
execute(SqlCommands.DROP_TABLE_BOOK_AUTHOR);
execute(SqlCommands.DROP_TABLE_BOOK_TRANSLATOR);
statement = getConnection().createStatement();
statement.execute(SqlCommands.CREATE_TABLE);
// create tables
execute(SqlCommands.CREATE_TABLE_BOOK);
execute(SqlCommands.CREATE_TABLE_PERSON);
execute(SqlCommands.CREATE_TABLE_BOOK_AUTHOR);
execute(SqlCommands.CREATE_TABLE_BOOK_TRANSLATOR);
super.prepare(sqlCommand);
}
public void insert(Ebook book) throws SQLException {
private void execute(String sql) throws SQLException {
try (Statement statement = getConnection().createStatement();) {
statement.execute(sql);
}
}
public void insert(Book book) throws SQLException {
// insert book
getPreparedStatement().setString(1, book.getId());
getPreparedStatement().setString(2, book.getTitle());
getPreparedStatement().setString(3, book.getSubtitle());
getPreparedStatement().setString(4, book.getLanguage());
getPreparedStatement().setString(5, book.getAuthor().getName());
getPreparedStatement().setInt(6, book.getAuthor().getDeathYear());
getPreparedStatement().addBatch();
if (++writeCount >= batchSize) {
endBatch();
endBatch();
// insert authors and translators
insertPersons(book.getAuthors(), SqlCommands.INSERT_BOOK_AUTHOR, book.getId());
insertPersons(book.getTranslators(), SqlCommands.INSERT_BOOK_TRANSLATOR, book.getId());
}
private void insertPersons(List<Person> persons, String insertRelationSql, String bookId) throws SQLException {
for (Person author : persons) {
// 1. try to select an existing authors/translators
Long personId = fetchId(author);
// 2. insert non-existing dude
if (personId == null) {
try (PreparedStatement ps = getConnection().prepareStatement(SqlCommands.INSERT_PERSON);) {
ps.setString(1, author.getName());
ps.setInt(2, author.getDeathYear());
ps.execute();
}
personId = fetchId(author);
}
// 3. create new relation(s)
try (PreparedStatement ps = getConnection().prepareStatement(insertRelationSql);) {
ps.setString(1, bookId);
ps.setLong(2, personId);
ps.execute();
}
}
}
private Long fetchId(Person author) throws SQLException {
try (PreparedStatement ps = getConnection().prepareStatement(SqlCommands.SELECT_PERSON_ID);) {
ps.setString(1, author.getName());
ps.setInt(2, author.getDeathYear());
try (ResultSet rs = ps.executeQuery();) {
while (rs.next()) {
return rs.getLong(1);
}
}
}
return null;
}
private void endBatch() throws SQLException {
......@@ -53,12 +113,12 @@ public class InsertOperation extends AbstractDbOperation {
private void createIndex() throws SQLException {
Statement statement = getConnection().createStatement();
statement.execute(SqlCommands.CREATE_INDEX);
statement.execute(SqlCommands.CREATE_INDEX_ON_BOOK);
}
@Override
public void begin() throws SQLException {
begin(SqlCommands.INSERT);
begin(SqlCommands.INSERT_BOOK);
}
}
/*
* Copyright (c) 2018-2019 Nicolas Peifer
* This work is free. You can redistribute it and/or modify it under the
* terms of the Do What The Fuck You Want To Public License, Version 2,
* as published by Sam Hocevar. See the COPYING file for more details.
*/
package org.codecoop.iceflower.gutenbergutils.database;
import java.sql.ResultSet;
import java.sql.SQLException;
import org.codecoop.iceflower.gutenbergutils.ebook.Ebook;
import org.codecoop.iceflower.gutenbergutils.parser.rdf.Author;
import org.codecoop.iceflower.gutenbergutils.database.entity.Book;
import org.codecoop.iceflower.gutenbergutils.database.entity.Person;
public class SelectOperation extends AbstractDbOperation {
private String wantedLanguage = "";
......@@ -15,16 +21,18 @@ public class SelectOperation extends AbstractDbOperation {
this.wantedLanguage = language;
}
public Ebook getNextBook() {
public Book getNextBook() {
try {
if (!resultSet.next()) {
return null;
}
Author author = new Author();
Book result = new Book(resultSet.getString(1), resultSet.getString(2), resultSet.getString(3),
resultSet.getString(4));
Person author = new Person();
author.setName(resultSet.getString(5));
// id, title, subtitle, language, author
return new Ebook(resultSet.getString(1), resultSet.getString(2), resultSet.getString(3),
resultSet.getString(4), author);
result.getAuthors().add(author);
return result;
} catch (SQLException e) {
// TODO Auto-generated catch block
e.printStackTrace();
......@@ -43,9 +51,9 @@ public class SelectOperation extends AbstractDbOperation {
@Override
public void begin() throws SQLException {
if (wantedLanguage.isEmpty()) {
begin(SqlCommands.SELECT_ALL);
begin(SqlCommands.SELECT_BOOKS);
} else {
begin(SqlCommands.SELECT_WHERE_LANGUAGE);
begin(SqlCommands.SELECT_BOOKS_WHERE_LANGUAGE);
}
resultSet = getPreparedStatement().executeQuery();
}
......
/*
* Copyright (c) 2018-2019 Nicolas Peifer
* This work is free. You can redistribute it and/or modify it under the
* terms of the Do What The Fuck You Want To Public License, Version 2,
* as published by Sam Hocevar. See the COPYING file for more details.
*/
package org.codecoop.iceflower.gutenbergutils.database;
public interface SqlCommands {
String CREATE_TABLE = "CREATE TABLE IF NOT EXISTS book " + "(id varchar(11) primary key," + "title varchar(500),"
+ "subtitle varchar(500)," + "language varchar(20)," + "author varchar(75)," + "death_year_of_author int);";
String CREATE_INDEX = "CREATE INDEX ON book (author, title);";
String INSERT = "INSERT INTO book (id, title, subtitle, language, author, death_year_of_author) VALUES (?,?,?,?,?,?);";
// https://de.wikipedia.org/wiki/Regelschutzfrist
String SELECT_WHERE_LANGUAGE = "SELECT id, title, subtitle, language, author FROM book WHERE language = ? AND death_year_of_author > -1 AND (YEAR(CURRENT_DATE())-death_year_of_author)>70 ORDER BY author, title;";
String SELECT_ALL = "SELECT id, title, subtitle, language, author FROM book where death_year_of_author > -1 AND (YEAR(CURRENT_DATE())-death_year_of_author)>70 ORDER BY author, title;";
String DROP_TABLE = "DROP TABLE IF EXISTS book;";
//@formatter:off
// drop tables
String DROP_TABLE_BOOK="DROP TABLE IF EXISTS book;";
String DROP_TABLE_PERSON = "DROP TABLE IF EXISTS person;";
String DROP_TABLE_BOOK_AUTHOR = "DROP TABLE IF EXISTS book_author;";
String DROP_TABLE_BOOK_TRANSLATOR = "DROP TABLE IF EXISTS book_translator;";
// create entity tables
String CREATE_TABLE_BOOK = "CREATE TABLE IF NOT EXISTS book "+
"("+
"id varchar(11) primary key,"+
"title varchar(500)," +
"subtitle varchar(500)," +
"language varchar(20)"+
");";
String CREATE_TABLE_PERSON = "CREATE TABLE IF NOT EXISTS person "+
"(" +
"id bigint auto_increment primary key," +
"name varchar(75),"+
"death_year int"+
");";
// create relation tables
String CREATE_TABLE_BOOK_AUTHOR = "CREATE TABLE IF NOT EXISTS book_author "+
"("+
"book_id varchar(11), "+
"person_id bigint, "+
"FOREIGN KEY(book_id) REFERENCES book(id), "+
"FOREIGN KEY(person_id) REFERENCES person(id)"+
");";
String CREATE_TABLE_BOOK_TRANSLATOR = "CREATE TABLE IF NOT EXISTS book_translator "+
"("+
"book_id varchar(11),"+
"person_id bigint, "+
"FOREIGN KEY(book_id) REFERENCES book(id), "+
"FOREIGN KEY(person_id) REFERENCES person(id)"+
");";
// create indices
String CREATE_INDEX_ON_BOOK = "CREATE INDEX ON book (title);";
String CREATE_INDEX_ON_PERSON = "CREATE INDEX ON person (name);";
// inserts
String INSERT_BOOK = "INSERT INTO book "
+ "(id, title, subtitle, language) "
+ "VALUES "
+ "(?,?,?,?);";
String INSERT_PERSON ="INSERT INTO person "
+ "(name, death_year) "
+ "VALUES "
+ "(?,?);";
String INSERT_BOOK_AUTHOR ="INSERT INTO book_author "
+ "(book_id, person_id)"
+ "VALUES "
+ "(?,?);";
String INSERT_BOOK_TRANSLATOR ="INSERT INTO book_translator "
+ "(book_id, person_id)"
+ "VALUES "
+ "(?,?);";
// selects
String SELECT_PERSON_ID="SELECT id FROM person WHERE name = ? AND death_year = ? ;";
String SELECT_BOOKS_WHERE_LANGUAGE =
"SELECT " +
"book.id," +
"title," +
"subtitle,"+
"language,"+
"author.name "+
"FROM "+
"book_author, book, person as author "+
"WHERE "+
"book_author.book_id = book.id "+
"AND book_author.person_id = author.id "+
"AND language = ? "+
// no author as copyright holder
"AND NOT EXISTS ("+
"SELECT 1 "+
"FROM "+
"book_author, person as author "+
"WHERE "+
"book.id = book_author.book_id "+
"AND book_author.person_id = author.id "+
"AND (author.death_year = -1 "+
"OR (YEAR(CURRENT_DATE()) - author.death_year) <= 70 )"+
") "+
// no translator as copyright holder
"AND NOT EXISTS ("+
"SELECT 1 "+
"FROM "+
"book_translator, person as translator "+
"WHERE "+
"book.id = book_translator.book_id "+
"AND book_translator.person_id = translator.id "+
"AND (translator.death_year = -1 "+
"OR (YEAR(CURRENT_DATE()) - translator.death_year) <= 70)"+
") "+
"ORDER BY "+
"author.name,"+
"book.title"+
";";
String SELECT_BOOKS =
"SELECT " +
"book.id," +
"title," +
"subtitle,"+
"language,"+
"author.name "+
"FROM "+
"book_author, book, person as author "+
"WHERE "+
"book_author.book_id = book.id "+
"AND book_author.person_id = author.id"+
// no author as copyright holder
"AND NOT EXISTS ("+
"SELECT 1 "+
"FROM "+
"book_author, person as author "+
"WHERE "+
"book.id = book_author.book_id "+
"AND book_author.person_id = author.id "+
"AND (author.death_year = -1 "+
"OR (YEAR(CURRENT_DATE()) - author.death_year) <= 70)"+
") "+
// no translator as copyright holder
"AND NOT EXISTS ("+
"SELECT 1 "+
"FROM "+
"book_translator, person as translator "+
"WHERE "+
"book.id = book_translator.book_id "+
"AND book_translator.person_id = translator.id "+
"AND (translator.death_year = -1 "+
"OR (YEAR(CURRENT_DATE()) - translator.death_year) <= 70)"+
") "+
"ORDER BY "+
"author.name,"+
"book.title"+
";";
}
package org.codecoop.iceflower.gutenbergutils.ebook;
import org.codecoop.iceflower.gutenbergutils.parser.rdf.Author;
public class Ebook {
/*
* Copyright (c) 2018-2019 Nicolas Peifer
* This work is free. You can redistribute it and/or modify it under the
* terms of the Do What The Fuck You Want To Public License, Version 2,
* as published by Sam Hocevar. See the COPYING file for more details.
*/
package org.codecoop.iceflower.gutenbergutils.database.entity;
import java.util.ArrayList;
import java.util.List;
import java.util.stream.Collectors;
public class Book {
private String id = "";
private String title = "";
private String subtitle = "";
private String language = "English";
private Author author;
private List<Person> authors = new ArrayList<>();
private List<Person> translators = new ArrayList<>();
public Ebook(String id) {
public Book(String id) {
this.id = id;
}
public Ebook(String id, String title, Author author) {
public Book(String id, String title) {
this.id = id;
this.title = title;
setAuthor(author);
}
public Ebook(String id, String title, String subtitle, String language, Author author) {
public Book(String id, String title, String subtitle, String language) {
super();
this.id = id;
this.title = title;
this.subtitle = subtitle;
this.language = language;
setAuthor(author);
}
private void setAuthor(Author newAuthor) {
if (!newAuthor.getName().isEmpty()) {
newAuthor.setName(newAuthor.getName().substring(0, 1).toUpperCase() + newAuthor.getName().substring(1));
}
this.author = newAuthor;
}
public String getId() {
......@@ -59,8 +59,12 @@ public class Ebook {
this.subtitle = subtitle;
}
public Author getAuthor() {
return author;
public List<Person> getAuthors() {
return authors;
}
public List<Person> getTranslators() {
return translators;
}
public String getLanguage() {
......@@ -80,7 +84,7 @@ public class Ebook {
builder.append(":");
builder.append(subtitle);
builder.append(":");
builder.append(author);
builder.append(authors.stream().map(Object::toString).collect(Collectors.joining(" & ")));
builder.append(":");
builder.append(language);
......
package org.codecoop.iceflower.gutenbergutils.parser.rdf;
/*
* Copyright (c) 2018-2019 Nicolas Peifer
* This work is free. You can redistribute it and/or modify it under the
* terms of the Do What The Fuck You Want To Public License, Version 2,
* as published by Sam Hocevar. See the COPYING file for more details.
*/
package org.codecoop.iceflower.gutenbergutils.database.entity;
public class Author {
public class Person {
private String name = "";
private int deathYear = -1;
......
/*
* Copyright (c) 2018-2019 Nicolas Peifer
* This work is free. You can redistribute it and/or modify it under the
* terms of the Do What The Fuck You Want To Public License, Version 2,
* as published by Sam Hocevar. See the COPYING file for more details.
*/
package org.codecoop.iceflower.gutenbergutils.html;
import java.io.BufferedWriter;
......@@ -15,15 +21,15 @@ import java.util.stream.IntStream;
import org.codecoop.iceflower.gutenbergutils.DownloadUrlBuilder;
import org.codecoop.iceflower.gutenbergutils.database.SelectOperation;
import org.codecoop.iceflower.gutenbergutils.ebook.Ebook;
import org.codecoop.iceflower.gutenbergutils.database.entity.Book;
import org.codecoop.iceflower.gutenbergutils.io.FileUtil;
/**
* This class writes {@link Ebook}s to HTML files. The {@link Ebook}s are split
* into groups by the first letter of {@link Ebook#getAuthor()}. Each group is
* stored in an own HTML file. Furthermore, the first letter of
* {@link Ebook#getAuthor()} is used to create a navigation bar in order to
* allow easy navigation between the separate HTML files.
* This class writes {@link Book}s to HTML files. The {@link Book}s are split
* into groups by the first letter of {@link Book#getAuthors().get(0)}. Each
* group is stored in an own HTML file. Furthermore, the first letter of
* {@link Book#getAuthors().get(0)} is used to create a navigation bar in order
* to allow easy navigation between the separate HTML files.
*
*/
public class HtmlWriter {
......@@ -63,18 +69,18 @@ public class HtmlWriter {
e.printStackTrace();
}
Ebook ebook;
Book ebook;
while ((ebook = selectOperation.getNextBook()) != null) {
String firstLetterOfAuthor = (ebook.getAuthor().getName().isEmpty()) ? NO_AUTHOR
: ebook.getAuthor().getName().substring(0, 1).toUpperCase();
String firstLetterOfAuthor = (ebook.getAuthors().get(0).getName().isEmpty()) ? NO_AUTHOR
: ebook.getAuthors().get(0).getName().substring(0, 1).toUpperCase();
if (!firstLetterOfAuthorSet.contains(firstLetterOfAuthor)) {
Logger.getGlobal().log(Level.WARNING, "The first letter of the author is invalid. Book: " + ebook);
continue;
}
boolean isNoAuthorBeginning = ebook.getAuthor().getName().isEmpty()
boolean isNoAuthorBeginning = ebook.getAuthors().get(0).getName().isEmpty()
&& !firstLetterOfPreviousAuthor.equals(NO_AUTHOR);
boolean isFirstAuthorLetterDifferent = !ebook.getAuthor().getName().isEmpty()
&& !ebook.getAuthor().getName().substring(0, 1).equalsIgnoreCase(firstLetterOfPreviousAuthor);
boolean isFirstAuthorLetterDifferent = !ebook.getAuthors().get(0).getName().isEmpty() && !ebook.getAuthors()
.get(0).getName().substring(0, 1).equalsIgnoreCase(firstLetterOfPreviousAuthor);
if (isNoAuthorBeginning || isFirstAuthorLetterDifferent) {
// author starts with a new letter
if (currentHtmlFile != null) {
......@@ -137,12 +143,12 @@ public class HtmlWriter {
return sb.toString();
}
private String bookToTableRow(Ebook ebook) {
private String bookToTableRow(Book ebook) {
// author, title, subtitle, language, Download
StringBuffer sb = new StringBuffer();
sb.append("<tr>\n");
sb.append("<td>");
sb.append(ebook.getAuthor().getName());
sb.append(ebook.getAuthors().get(0).getName());
sb.append("</td>");
sb.append("<td>");
sb.append(ebook.getTitle());
......
/*
* Copyright (c) 2018-2019 Nicolas Peifer
* This work is free. You can redistribute it and/or modify it under the
* terms of the Do What The Fuck You Want To Public License, Version 2,
* as published by Sam Hocevar. See the COPYING file for more details.
*/
package org.codecoop.iceflower.gutenbergutils.io;
import java.io.File;
......
/*
* Copyright (c) 2018-2019 Nicolas Peifer
* This work is free. You can redistribute it and/or modify it under the
* terms of the Do What The Fuck You Want To Public License, Version 2,
* as published by Sam Hocevar. See the COPYING file for more details.
*/
package org.codecoop.iceflower.gutenbergutils.io;
import java.io.BufferedReader;
......
/*
* Copyright (c) 2018-2019 Nicolas Peifer
* This work is free. You can redistribute it and/or modify it under the
* terms of the Do What The Fuck You Want To Public License, Version 2,
* as published by Sam Hocevar. See the COPYING file for more details.
*/
package org.codecoop.iceflower.gutenbergutils.io;
import java.io.IOException;
......@@ -5,7 +11,7 @@ import java.nio.file.Path;
import java.sql.SQLException;
import org.codecoop.iceflower.gutenbergutils.database.InsertOperation;
import org.codecoop.iceflower.gutenbergutils.ebook.Ebook;
import org.codecoop.iceflower.gutenbergutils.database.entity.Book;
import org.codecoop.iceflower.gutenbergutils.parser.rdf.RdfParser;
public class RdfFileCrawler extends FileCrawler {
......@@ -33,7 +39,7 @@ public class RdfFileCrawler extends FileCrawler {
@Override
protected void process(Path filePath) {
try {
Ebook ebook = rdfParser.parse(FileUtil.readFile(filePath));
Book ebook = rdfParser.parse(FileUtil.readFile(filePath));
if (ebook != null) {
setFileCount(getFileCount() + 1);
insertOperation.insert(ebook);
......
/*
* Copyright (c) 2018-2019 Nicolas Peifer
* This work is free. You can redistribute it and/or modify it under the
* terms of the Do What The Fuck You Want To Public License, Version 2,
* as published by Sam Hocevar. See the COPYING file for more details.
*/
package org.codecoop.iceflower.gutenbergutils.parser.rdf;
public class ParserUtils {
/**
* Convert the first letter of a string to upper-case and return the result.
*
* @param string the {@link String} whose first letter should be converted to
* upper-case.
* @return the input {@link String} with the first letter capitalized.
*/
public static String capitalizeFirstLetter(String string) {
if (!string.isEmpty()) {
return string.substring(0, 1).toUpperCase() + string.substring(1);
} else {
return string;
}
}
}