Commit ed0a4927 authored by Nicolas Peifer's avatar Nicolas Peifer

improving copyright filtering (including all involved persons)

parent b124aeee
......@@ -38,13 +38,13 @@ public class InsertOperation extends AbstractDbOperation {
execute(SqlCommands.DROP_TABLE_BOOK);
execute(SqlCommands.DROP_TABLE_PERSON);
execute(SqlCommands.DROP_TABLE_BOOK_AUTHOR);
execute(SqlCommands.DROP_TABLE_BOOK_TRANSLATOR);
execute(SqlCommands.DROP_TABLE_BOOK_CONTRIBUTOR);
// create tables
execute(SqlCommands.CREATE_TABLE_BOOK);
execute(SqlCommands.CREATE_TABLE_PERSON);
execute(SqlCommands.CREATE_TABLE_BOOK_AUTHOR);
execute(SqlCommands.CREATE_TABLE_BOOK_TRANSLATOR);
execute(SqlCommands.CREATE_TABLE_BOOK_CONTRIBUTOR);
super.prepare(sqlCommand);
}
......@@ -66,7 +66,7 @@ public class InsertOperation extends AbstractDbOperation {
// insert authors and translators
insertPersons(book.getAuthors(), SqlCommands.INSERT_BOOK_AUTHOR, book.getId());
insertPersons(book.getTranslators(), SqlCommands.INSERT_BOOK_TRANSLATOR, book.getId());
insertPersons(book.getContributors(), SqlCommands.INSERT_BOOK_CONTRIBUTOR, book.getId());
}
private void insertPersons(List<Person> persons, String insertRelationSql, String bookId) throws SQLException {
......@@ -94,8 +94,7 @@ public class InsertOperation extends AbstractDbOperation {
}
private Long fetchId(Person author) throws SQLException {
String query = (author.getDeathYear() != null) ? SqlCommands.SELECT_PERSON_ID
: SqlCommands.SELECT_PERSON_ID2;
String query = (author.getDeathYear() != null) ? SqlCommands.SELECT_PERSON_ID : SqlCommands.SELECT_PERSON_ID2;
try (PreparedStatement ps = getConnection().prepareStatement(query);) {
ps.setString(1, author.getName());
setIntOnPreparedStatement(2, author.getDeathYear(), ps);
......
......@@ -13,7 +13,7 @@ public interface SqlCommands {
String DROP_TABLE_BOOK="DROP TABLE IF EXISTS book;";
String DROP_TABLE_PERSON = "DROP TABLE IF EXISTS person;";
String DROP_TABLE_BOOK_AUTHOR = "DROP TABLE IF EXISTS book_author;";
String DROP_TABLE_BOOK_TRANSLATOR = "DROP TABLE IF EXISTS book_translator;";
String DROP_TABLE_BOOK_CONTRIBUTOR = "DROP TABLE IF EXISTS book_contributor;";
// create entity tables
String CREATE_TABLE_BOOK = "CREATE TABLE IF NOT EXISTS book "+
......@@ -38,7 +38,7 @@ public interface SqlCommands {
"FOREIGN KEY(book_id) REFERENCES book(id), "+
"FOREIGN KEY(person_id) REFERENCES person(id)"+
");";
String CREATE_TABLE_BOOK_TRANSLATOR = "CREATE TABLE IF NOT EXISTS book_translator "+
String CREATE_TABLE_BOOK_CONTRIBUTOR = "CREATE TABLE IF NOT EXISTS book_contributor "+
"("+
"book_id varchar(11),"+
"person_id bigint, "+
......@@ -63,7 +63,7 @@ public interface SqlCommands {
+ "(book_id, person_id)"
+ "VALUES "
+ "(?,?);";
String INSERT_BOOK_TRANSLATOR ="INSERT INTO book_translator "
String INSERT_BOOK_CONTRIBUTOR ="INSERT INTO book_contributor "
+ "(book_id, person_id)"
+ "VALUES "
+ "(?,?);";
......@@ -96,16 +96,16 @@ public interface SqlCommands {
"AND (author.death_year IS NULL "+
"OR (YEAR(CURRENT_DATE()) - author.death_year) <= 70 )"+
") "+
// no translator as copyright holder
// no contributor as copyright holder
"AND NOT EXISTS ("+
"SELECT 1 "+
"FROM "+
"book_translator, person as translator "+
"book_contributor, person as contributor "+
"WHERE "+
"book.id = book_translator.book_id "+
"AND book_translator.person_id = translator.id "+
"AND (translator.death_year IS NULL "+
"OR (YEAR(CURRENT_DATE()) - translator.death_year) <= 70)"+
"book.id = book_contributor.book_id "+
"AND book_contributor.person_id = contributor.id "+
"AND (contributor.death_year IS NULL "+
"OR (YEAR(CURRENT_DATE()) - contributor.death_year) <= 70)"+
") "+
"ORDER BY "+
"author.name,"+
......@@ -135,16 +135,16 @@ public interface SqlCommands {
"AND (author.death_year IS NULL "+
"OR (YEAR(CURRENT_DATE()) - author.death_year) <= 70)"+
") "+
// no translator as copyright holder
// no contributor as copyright holder
"AND NOT EXISTS ("+
"SELECT 1 "+
"FROM "+
"book_translator, person as translator "+
"book_contributor, person as contributor "+
"WHERE "+
"book.id = book_translator.book_id "+
"AND book_translator.person_id = translator.id "+
"AND (translator.death_year IS NULL "+
"OR (YEAR(CURRENT_DATE()) - translator.death_year) <= 70)"+
"book.id = book_contributor.book_id "+
"AND book_contributor.person_id = contributor.id "+
"AND (contributor.death_year IS NULL "+
"OR (YEAR(CURRENT_DATE()) - contributor.death_year) <= 70)"+
") "+
"ORDER BY "+
"author.name,"+
......
......@@ -16,7 +16,7 @@ public class Book {
private String subtitle = "";
private String language = "English";
private List<Person> authors = new ArrayList<>();
private List<Person> translators = new ArrayList<>();
private List<Person> contributors = new ArrayList<>();
public Book(String id) {
this.id = id;
......@@ -63,8 +63,8 @@ public class Book {
return authors;
}
public List<Person> getTranslators() {
return translators;
public List<Person> getContributors() {
return contributors;
}
public String getLanguage() {
......
......@@ -9,6 +9,7 @@ package org.codecoop.iceflower.gutenbergutils.database.entity;
public class Person {
private String name = "";
private Integer deathYear = null;
private String role = "";
public String getName() {
return name;
......@@ -37,4 +38,12 @@ public class Person {
return builder.toString();
}
public String getRole() {
return role;
}
public void setRole(String role) {
this.role = role;
}
}
......@@ -20,7 +20,7 @@ import org.codecoop.iceflower.gutenbergutils.database.entity.Person;
public class RdfParser {
private Pattern bookIdPattern = Pattern.compile("\\<pgterms\\:ebook rdf\\:about=\"ebooks\\/(\\d+)\"\\>");
private Pattern creatorPattern = Pattern.compile("\\<dcterms\\:creator\\>(.+?)\\<\\/dcterms\\:creator\\>",
private Pattern creatorPattern = Pattern.compile("\\<dcterms\\:(creator)\\>(.+?)\\<\\/dcterms\\:creator\\>",
Pattern.DOTALL);
private Pattern personNamePattern = Pattern.compile("\\<pgterms\\:name\\>(.*)\\<\\/pgterms\\:name\\>");
......@@ -28,7 +28,7 @@ public class RdfParser {
private Pattern deathYearPattern = Pattern
.compile("\\<pgterms\\:deathdate .*\\>(\\-?\\d+)\\<\\/pgterms\\:deathdate\\>");
private Pattern translatorPattern = Pattern.compile("\\<marcrel\\:trl\\>(.+?)\\<\\/marcrel\\:trl\\>",
private Pattern contributorPattern = Pattern.compile("\\<marcrel\\:(\\w+)\\>(.+?)\\<\\/marcrel\\:\\w+\\>",
Pattern.DOTALL);
private Pattern languagePattern = Pattern.compile("\\<dcterms\\:language\\>(.+)\\<\\/dcterms\\:language\\>",
......@@ -51,7 +51,7 @@ public class RdfParser {
String bookId = extractBookId(rdfString);
List<Person> authors = new ArrayList<>();
List<Person> translators = new ArrayList<>();
List<Person> contributors = new ArrayList<>();
try {
authors = extractPersons(rdfString, creatorPattern);
......@@ -60,7 +60,7 @@ public class RdfParser {
}
try {
translators = extractPersons(rdfString, translatorPattern);
contributors = extractPersons(rdfString, contributorPattern);
} catch (RuntimeException ex) {
Logger.getGlobal().log(Level.WARNING, "Error parsing book " + bookId + ". " + ex.getMessage());
}
......@@ -71,7 +71,7 @@ public class RdfParser {
Book result = new Book(bookId, titles.get(0));
result.getAuthors().addAll(authors);
result.getTranslators().addAll(translators);
result.getContributors().addAll(contributors);
result.setLanguage(language);
if (titles.size() > 1) {
result.setSubtitle(titles.stream().skip(1).collect(Collectors.joining("\n")));
......@@ -95,10 +95,10 @@ public class RdfParser {
private List<Person> extractPersons(String rdfString, Pattern personPattern) {
List<Person> persons = new ArrayList<>();
Matcher creatorMatcher = personPattern.matcher(rdfString);
Matcher personMatcher = personPattern.matcher(rdfString);
while (creatorMatcher.find()) {
String creator = creatorMatcher.group(1);
while (personMatcher.find()) {
String creator = personMatcher.group(2);
// extract author name
Matcher personNameMatcher = personNamePattern.matcher(creator);
if (!personNameMatcher.find()) {
......@@ -116,6 +116,7 @@ public class RdfParser {
Person person = new Person();
person.setName(ParserUtils.capitalizeFirstLetter(personNameMatcher.group(1)));
person.setDeathYear(deathYear);
person.setRole(personMatcher.group(1));
persons.add(person);
}
return persons;
......
......@@ -4,6 +4,7 @@ import static org.junit.Assert.assertNull;
import static org.junit.jupiter.api.Assertions.assertEquals;
import org.codecoop.iceflower.gutenbergutils.database.entity.Book;
import org.codecoop.iceflower.gutenbergutils.database.entity.Person;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Test;
......@@ -74,18 +75,18 @@ class RdfParserTest {
void testTranslator() {
RdfParser rdfParser = new RdfParser();
Book ebook = rdfParser.parse(TestConstants.RDF_WITH_TRANSLATOR_ELEMENT);
assertEquals(1, ebook.getTranslators().size());
assertEquals(1, ebook.getContributors().size());
assertEquals("Sologub, Fjodor", ebook.getAuthors().get(0).getName());
assertEquals("Walter, Reinhold von", ebook.getTranslators().get(0).getName());
assertEquals("Walter, Reinhold von", ebook.getContributors().get(0).getName());
}
@Test
void testDeadTranslator() {
RdfParser rdfParser = new RdfParser();
Book ebook = rdfParser.parse(TestConstants.RDF_WITH_DEAD_TRANSLATOR);
assertEquals(1, ebook.getTranslators().size());
assertEquals("Howard, Velma Swanston", ebook.getTranslators().get(0).getName());
assertEquals(1937, ebook.getTranslators().get(0).getDeathYear());
assertEquals(1, ebook.getContributors().size());
assertEquals("Howard, Velma Swanston", ebook.getContributors().get(0).getName());
assertEquals(1937, ebook.getContributors().get(0).getDeathYear());
}
@Test
......@@ -95,4 +96,29 @@ class RdfParserTest {
assertEquals(-348, ebook.getAuthors().get(0).getDeathYear());
}
@Test
void testMultipleContributors() {
RdfParser rdfParser = new RdfParser();
Book ebook = rdfParser.parse(TestConstants.RDF_WITH_MULTIPLE_CONTRIBUTOR_TYPES);
assertEquals("Dostoyevsky, Fyodor", ebook.getAuthors().get(0).getName());
assertEquals("creator", ebook.getAuthors().get(0).getRole());
assertEquals(3, ebook.getContributors().size());
Person contributor = ebook.getContributors().get(0);
assertEquals("Moeller van den Bruck, Arthur", contributor.getName());
assertEquals(1925, contributor.getDeathYear());
assertEquals("edt", contributor.getRole());
contributor = ebook.getContributors().get(1);
assertEquals("Merezhkovsky, Dmitry Sergeyevich", contributor.getName());
assertEquals(1941, contributor.getDeathYear());
assertEquals("ctb", contributor.getRole());
contributor = ebook.getContributors().get(2);
assertEquals("Rahsin, E. K.", contributor.getName());
assertEquals(1966, contributor.getDeathYear());
assertEquals("trl", contributor.getRole());
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment