Commit 6ff4ebb6 authored by Nicolas Peifer's avatar Nicolas Peifer

Implement support for negative birth and death dates (#1)

parent b8f9d238
......@@ -26,7 +26,7 @@ public class RdfParser {
private Pattern personNamePattern = Pattern.compile("\\<pgterms\\:name\\>(.*)\\<\\/pgterms\\:name\\>");
private Pattern deathYearPattern = Pattern
.compile("\\<pgterms\\:deathdate .*\\>(\\d+)\\<\\/pgterms\\:deathdate\\>");
.compile("\\<pgterms\\:deathdate .*\\>(\\-?\\d+)\\<\\/pgterms\\:deathdate\\>");
private Pattern translatorPattern = Pattern.compile("\\<marcrel\\:trl\\>(.+?)\\<\\/marcrel\\:trl\\>",
Pattern.DOTALL);
......
......@@ -88,4 +88,11 @@ class RdfParserTest {
assertEquals(1937, ebook.getTranslators().get(0).getDeathYear());
}
@Test
void testNegativeDeathYear() {
RdfParser rdfParser = new RdfParser();
Book ebook = rdfParser.parse(TestConstants.RDF_WITH_NEGATIVE_DEATH_YEAR);
assertEquals(-348, ebook.getAuthors().get(0).getDeathYear());
}
}
......@@ -1073,4 +1073,188 @@ public class TestConstants {
+ " </cc:Work>\n" + " <rdf:Description rdf:about=\"http://en.wikipedia.org/wiki/Selma_Lagerlöf\">\n"
+ " <dcterms:description>Wikipedia</dcterms:description>\n" + " </rdf:Description>\n" + "</rdf:RDF>\n"
+ "";
public static final String RDF_WITH_NEGATIVE_DEATH_YEAR = "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n"
+ "<rdf:RDF xml:base=\"http://www.gutenberg.org/\"\n"
+ " xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\"\n"
+ " xmlns:dcterms=\"http://purl.org/dc/terms/\"\n"
+ " xmlns:pgterms=\"http://www.gutenberg.org/2009/pgterms/\"\n"
+ " xmlns:cc=\"http://web.resource.org/cc/\"\n"
+ " xmlns:marcrel=\"http://id.loc.gov/vocabulary/relators/\"\n"
+ " xmlns:dcam=\"http://purl.org/dc/dcam/\"\n" + " xmlns:rdfs=\"http://www.w3.org/2000/01/rdf-schema#\"\n"
+ ">\n" + " <pgterms:ebook rdf:about=\"ebooks/24899\">\n" + " <dcterms:hasFormat>\n"
+ " <pgterms:file rdf:about=\"http://www.gutenberg.org/files/24899/24899-h.zip\">\n"
+ " <dcterms:isFormatOf rdf:resource=\"ebooks/24899\"/>\n"
+ " <dcterms:modified rdf:datatype=\"http://www.w3.org/2001/XMLSchema#dateTime\">2008-03-23T11:16:04</dcterms:modified>\n"
+ " <dcterms:format>\n"
+ " <rdf:Description rdf:nodeID=\"Nc515c231f1704346bad636241b7bc125\">\n"
+ " <dcam:memberOf rdf:resource=\"http://purl.org/dc/terms/IMT\"/>\n"
+ " <rdf:value rdf:datatype=\"http://purl.org/dc/terms/IMT\">application/zip</rdf:value>\n"
+ " </rdf:Description>\n" + " </dcterms:format>\n"
+ " <dcterms:extent rdf:datatype=\"http://www.w3.org/2001/XMLSchema#integer\">182848</dcterms:extent>\n"
+ " <dcterms:format>\n"
+ " <rdf:Description rdf:nodeID=\"N89409acea5cb47c583a736644a44f810\">\n"
+ " <dcam:memberOf rdf:resource=\"http://purl.org/dc/terms/IMT\"/>\n"
+ " <rdf:value rdf:datatype=\"http://purl.org/dc/terms/IMT\">text/html; charset=iso-8859-1</rdf:value>\n"
+ " </rdf:Description>\n" + " </dcterms:format>\n" + " </pgterms:file>\n"
+ " </dcterms:hasFormat>\n" + " <marcrel:trl>\n"
+ " <pgterms:agent rdf:about=\"2009/agents/26800\">\n"
+ " <pgterms:birthdate rdf:datatype=\"http://www.w3.org/2001/XMLSchema#integer\">1873</pgterms:birthdate>\n"
+ " <pgterms:name>Kassner, Rudolf</pgterms:name>\n"
+ " <pgterms:webpage rdf:resource=\"http://de.wikipedia.org/wiki/Rudolf_Kassner\"/>\n"
+ " <pgterms:deathdate rdf:datatype=\"http://www.w3.org/2001/XMLSchema#integer\">1959</pgterms:deathdate>\n"
+ " </pgterms:agent>\n" + " </marcrel:trl>\n"
+ " <dcterms:title>Platons Gastmahl</dcterms:title>\n" + " <pgterms:bookshelf>\n"
+ " <rdf:Description rdf:nodeID=\"N3954ea5c2b874d999864c5fee9278b04\">\n"
+ " <dcam:memberOf rdf:resource=\"2009/pgterms/Bookshelf\"/>\n"
+ " <rdf:value>DE Sachbuch</rdf:value>\n" + " </rdf:Description>\n"
+ " </pgterms:bookshelf>\n" + " <dcterms:type>\n"
+ " <rdf:Description rdf:nodeID=\"N88a927c5c1bf494e8c48a555b5899455\">\n"
+ " <rdf:value>Text</rdf:value>\n"
+ " <dcam:memberOf rdf:resource=\"http://purl.org/dc/terms/DCMIType\"/>\n"
+ " </rdf:Description>\n" + " </dcterms:type>\n" + " <dcterms:hasFormat>\n"
+ " <pgterms:file rdf:about=\"http://www.gutenberg.org/ebooks/24899.kindle.noimages\">\n"
+ " <dcterms:extent rdf:datatype=\"http://www.w3.org/2001/XMLSchema#integer\">269224</dcterms:extent>\n"
+ " <dcterms:format>\n"
+ " <rdf:Description rdf:nodeID=\"N5dd22e6431c8451496cee37ad3c7d3ec\">\n"
+ " <dcam:memberOf rdf:resource=\"http://purl.org/dc/terms/IMT\"/>\n"
+ " <rdf:value rdf:datatype=\"http://purl.org/dc/terms/IMT\">application/x-mobipocket-ebook</rdf:value>\n"
+ " </rdf:Description>\n" + " </dcterms:format>\n"
+ " <dcterms:isFormatOf rdf:resource=\"ebooks/24899\"/>\n"
+ " <dcterms:modified rdf:datatype=\"http://www.w3.org/2001/XMLSchema#dateTime\">2018-10-12T03:22:07.853318</dcterms:modified>\n"
+ " </pgterms:file>\n" + " </dcterms:hasFormat>\n" + " <dcterms:hasFormat>\n"
+ " <pgterms:file rdf:about=\"http://www.gutenberg.org/ebooks/24899.rdf\">\n"
+ " <dcterms:extent rdf:datatype=\"http://www.w3.org/2001/XMLSchema#integer\">13760</dcterms:extent>\n"
+ " <dcterms:modified rdf:datatype=\"http://www.w3.org/2001/XMLSchema#dateTime\">2019-04-04T05:20:27.399199</dcterms:modified>\n"
+ " <dcterms:format>\n"
+ " <rdf:Description rdf:nodeID=\"Na77d5ab60d2b4a59a1b732d8a6cff6a2\">\n"
+ " <rdf:value rdf:datatype=\"http://purl.org/dc/terms/IMT\">application/rdf+xml</rdf:value>\n"
+ " <dcam:memberOf rdf:resource=\"http://purl.org/dc/terms/IMT\"/>\n"
+ " </rdf:Description>\n" + " </dcterms:format>\n"
+ " <dcterms:isFormatOf rdf:resource=\"ebooks/24899\"/>\n" + " </pgterms:file>\n"
+ " </dcterms:hasFormat>\n" + " <dcterms:hasFormat>\n"
+ " <pgterms:file rdf:about=\"http://www.gutenberg.org/files/24899/24899-h/24899-h.htm\">\n"
+ " <dcterms:isFormatOf rdf:resource=\"ebooks/24899\"/>\n"
+ " <dcterms:extent rdf:datatype=\"http://www.w3.org/2001/XMLSchema#integer\">164642</dcterms:extent>\n"
+ " <dcterms:modified rdf:datatype=\"http://www.w3.org/2001/XMLSchema#dateTime\">2008-03-23T11:15:50</dcterms:modified>\n"
+ " <dcterms:format>\n"
+ " <rdf:Description rdf:nodeID=\"N6f19d37a76c24478ba99c3bd979bbb6a\">\n"
+ " <dcam:memberOf rdf:resource=\"http://purl.org/dc/terms/IMT\"/>\n"
+ " <rdf:value rdf:datatype=\"http://purl.org/dc/terms/IMT\">text/html; charset=iso-8859-1</rdf:value>\n"
+ " </rdf:Description>\n" + " </dcterms:format>\n" + " </pgterms:file>\n"
+ " </dcterms:hasFormat>\n" + " <dcterms:rights>Public domain in the USA.</dcterms:rights>\n"
+ " <pgterms:downloads rdf:datatype=\"http://www.w3.org/2001/XMLSchema#integer\">22</pgterms:downloads>\n"
+ " <dcterms:publisher>Project Gutenberg</dcterms:publisher>\n" + " <dcterms:subject>\n"
+ " <rdf:Description rdf:nodeID=\"N786b8b8458404cf881fec3870aef162c\">\n"
+ " <rdf:value>Plato</rdf:value>\n"
+ " <dcam:memberOf rdf:resource=\"http://purl.org/dc/terms/LCSH\"/>\n" + " </rdf:Description>\n"
+ " </dcterms:subject>\n" + " <dcterms:license rdf:resource=\"license\"/>\n"
+ " <dcterms:hasFormat>\n"
+ " <pgterms:file rdf:about=\"http://www.gutenberg.org/files/24899/24899-8.zip\">\n"
+ " <dcterms:isFormatOf rdf:resource=\"ebooks/24899\"/>\n"
+ " <dcterms:modified rdf:datatype=\"http://www.w3.org/2001/XMLSchema#dateTime\">2008-03-23T11:16:02</dcterms:modified>\n"
+ " <dcterms:format>\n"
+ " <rdf:Description rdf:nodeID=\"Nc755c08b652a4a23ad1c5cda8c71bdc0\">\n"
+ " <rdf:value rdf:datatype=\"http://purl.org/dc/terms/IMT\">application/zip</rdf:value>\n"
+ " <dcam:memberOf rdf:resource=\"http://purl.org/dc/terms/IMT\"/>\n"
+ " </rdf:Description>\n" + " </dcterms:format>\n" + " <dcterms:format>\n"
+ " <rdf:Description rdf:nodeID=\"N23f554c1174f4a3b8a1719cb333b2998\">\n"
+ " <dcam:memberOf rdf:resource=\"http://purl.org/dc/terms/IMT\"/>\n"
+ " <rdf:value rdf:datatype=\"http://purl.org/dc/terms/IMT\">text/plain; charset=iso-8859-1</rdf:value>\n"
+ " </rdf:Description>\n" + " </dcterms:format>\n"
+ " <dcterms:extent rdf:datatype=\"http://www.w3.org/2001/XMLSchema#integer\">56677</dcterms:extent>\n"
+ " </pgterms:file>\n" + " </dcterms:hasFormat>\n" + " <dcterms:hasFormat>\n"
+ " <pgterms:file rdf:about=\"http://www.gutenberg.org/ebooks/24899.epub.noimages\">\n"
+ " <dcterms:modified rdf:datatype=\"http://www.w3.org/2001/XMLSchema#dateTime\">2018-10-12T03:22:06.574342</dcterms:modified>\n"
+ " <dcterms:format>\n"
+ " <rdf:Description rdf:nodeID=\"N5e58fa451a76405f8236bf64cf274d6e\">\n"
+ " <dcam:memberOf rdf:resource=\"http://purl.org/dc/terms/IMT\"/>\n"
+ " <rdf:value rdf:datatype=\"http://purl.org/dc/terms/IMT\">application/epub+zip</rdf:value>\n"
+ " </rdf:Description>\n" + " </dcterms:format>\n"
+ " <dcterms:isFormatOf rdf:resource=\"ebooks/24899\"/>\n"
+ " <dcterms:extent rdf:datatype=\"http://www.w3.org/2001/XMLSchema#integer\">64102</dcterms:extent>\n"
+ " </pgterms:file>\n" + " </dcterms:hasFormat>\n" + " <dcterms:hasFormat>\n"
+ " <pgterms:file rdf:about=\"http://www.gutenberg.org/ebooks/24899.kindle.images\">\n"
+ " <dcterms:modified rdf:datatype=\"http://www.w3.org/2001/XMLSchema#dateTime\">2018-10-12T03:22:07.302325</dcterms:modified>\n"
+ " <dcterms:extent rdf:datatype=\"http://www.w3.org/2001/XMLSchema#integer\">498479</dcterms:extent>\n"
+ " <dcterms:format>\n"
+ " <rdf:Description rdf:nodeID=\"Nc4422650902a4850ad7c7e5ab4c9d471\">\n"
+ " <rdf:value rdf:datatype=\"http://purl.org/dc/terms/IMT\">application/x-mobipocket-ebook</rdf:value>\n"
+ " <dcam:memberOf rdf:resource=\"http://purl.org/dc/terms/IMT\"/>\n"
+ " </rdf:Description>\n" + " </dcterms:format>\n"
+ " <dcterms:isFormatOf rdf:resource=\"ebooks/24899\"/>\n" + " </pgterms:file>\n"
+ " </dcterms:hasFormat>\n" + " <dcterms:hasFormat>\n"
+ " <pgterms:file rdf:about=\"http://www.gutenberg.org/files/24899/24899-8.txt\">\n"
+ " <dcterms:modified rdf:datatype=\"http://www.w3.org/2001/XMLSchema#dateTime\">2008-03-23T11:16:00</dcterms:modified>\n"
+ " <dcterms:isFormatOf rdf:resource=\"ebooks/24899\"/>\n" + " <dcterms:format>\n"
+ " <rdf:Description rdf:nodeID=\"N43bc75ae12ba4991b94ce5dc71e2612a\">\n"
+ " <rdf:value rdf:datatype=\"http://purl.org/dc/terms/IMT\">text/plain; charset=iso-8859-1</rdf:value>\n"
+ " <dcam:memberOf rdf:resource=\"http://purl.org/dc/terms/IMT\"/>\n"
+ " </rdf:Description>\n" + " </dcterms:format>\n"
+ " <dcterms:extent rdf:datatype=\"http://www.w3.org/2001/XMLSchema#integer\">149764</dcterms:extent>\n"
+ " </pgterms:file>\n" + " </dcterms:hasFormat>\n" + " <dcterms:hasFormat>\n"
+ " <pgterms:file rdf:about=\"http://www.gutenberg.org/ebooks/24899.epub.images\">\n"
+ " <dcterms:isFormatOf rdf:resource=\"ebooks/24899\"/>\n"
+ " <dcterms:extent rdf:datatype=\"http://www.w3.org/2001/XMLSchema#integer\">186729</dcterms:extent>\n"
+ " <dcterms:format>\n"
+ " <rdf:Description rdf:nodeID=\"N728a7c3df0624dc3ac7ff3ae83d81e06\">\n"
+ " <dcam:memberOf rdf:resource=\"http://purl.org/dc/terms/IMT\"/>\n"
+ " <rdf:value rdf:datatype=\"http://purl.org/dc/terms/IMT\">application/epub+zip</rdf:value>\n"
+ " </rdf:Description>\n" + " </dcterms:format>\n"
+ " <dcterms:modified rdf:datatype=\"http://www.w3.org/2001/XMLSchema#dateTime\">2018-10-12T03:22:06.384352</dcterms:modified>\n"
+ " </pgterms:file>\n" + " </dcterms:hasFormat>\n" + " <dcterms:language>\n"
+ " <rdf:Description rdf:nodeID=\"Na3bfed5e7b76420092bcb2c6cf5d98ed\">\n"
+ " <rdf:value rdf:datatype=\"http://purl.org/dc/terms/RFC4646\">de</rdf:value>\n"
+ " </rdf:Description>\n" + " </dcterms:language>\n" + " <dcterms:subject>\n"
+ " <rdf:Description rdf:nodeID=\"Na8289c38aa354bada416ce4fae10d62f\">\n"
+ " <dcam:memberOf rdf:resource=\"http://purl.org/dc/terms/LCSH\"/>\n"
+ " <rdf:value>Classical literature</rdf:value>\n" + " </rdf:Description>\n"
+ " </dcterms:subject>\n" + " <dcterms:subject>\n"
+ " <rdf:Description rdf:nodeID=\"N4b71b6145b704fc0a04880a62b5c22a4\">\n"
+ " <dcam:memberOf rdf:resource=\"http://purl.org/dc/terms/LCC\"/>\n"
+ " <rdf:value>BD</rdf:value>\n" + " </rdf:Description>\n" + " </dcterms:subject>\n"
+ " <dcterms:creator>\n" + " <pgterms:agent rdf:about=\"2009/agents/93\">\n"
+ " <pgterms:birthdate rdf:datatype=\"http://www.w3.org/2001/XMLSchema#integer\">-428</pgterms:birthdate>\n"
+ " <pgterms:webpage rdf:resource=\"http://en.wikipedia.org/wiki/Plato\"/>\n"
+ " <pgterms:name>Plato</pgterms:name>\n"
+ " <pgterms:deathdate rdf:datatype=\"http://www.w3.org/2001/XMLSchema#integer\">-348</pgterms:deathdate>\n"
+ " <pgterms:alias>Πλάτων</pgterms:alias>\n" + " </pgterms:agent>\n"
+ " </dcterms:creator>\n" + " <dcterms:hasFormat>\n"
+ " <pgterms:file rdf:about=\"http://www.gutenberg.org/files/24899/24899-0.txt\">\n"
+ " <dcterms:isFormatOf rdf:resource=\"ebooks/24899\"/>\n"
+ " <dcterms:modified rdf:datatype=\"http://www.w3.org/2001/XMLSchema#dateTime\">2008-03-23T11:15:48</dcterms:modified>\n"
+ " <dcterms:extent rdf:datatype=\"http://www.w3.org/2001/XMLSchema#integer\">152794</dcterms:extent>\n"
+ " <dcterms:format>\n"
+ " <rdf:Description rdf:nodeID=\"Nfff7300b9b7b47f7939e70fb9072511b\">\n"
+ " <dcam:memberOf rdf:resource=\"http://purl.org/dc/terms/IMT\"/>\n"
+ " <rdf:value rdf:datatype=\"http://purl.org/dc/terms/IMT\">text/plain; charset=utf-8</rdf:value>\n"
+ " </rdf:Description>\n" + " </dcterms:format>\n" + " </pgterms:file>\n"
+ " </dcterms:hasFormat>\n" + " <dcterms:hasFormat>\n"
+ " <pgterms:file rdf:about=\"http://www.gutenberg.org/files/24899/24899-0.zip\">\n"
+ " <dcterms:format>\n"
+ " <rdf:Description rdf:nodeID=\"N0918c9a5b56946e88c42f1d72f5014a1\">\n"
+ " <dcam:memberOf rdf:resource=\"http://purl.org/dc/terms/IMT\"/>\n"
+ " <rdf:value rdf:datatype=\"http://purl.org/dc/terms/IMT\">text/plain; charset=utf-8</rdf:value>\n"
+ " </rdf:Description>\n" + " </dcterms:format>\n" + " <dcterms:format>\n"
+ " <rdf:Description rdf:nodeID=\"N1aabc61b22434cd1902bd8c9e6f933c7\">\n"
+ " <rdf:value rdf:datatype=\"http://purl.org/dc/terms/IMT\">application/zip</rdf:value>\n"
+ " <dcam:memberOf rdf:resource=\"http://purl.org/dc/terms/IMT\"/>\n"
+ " </rdf:Description>\n" + " </dcterms:format>\n"
+ " <dcterms:modified rdf:datatype=\"http://www.w3.org/2001/XMLSchema#dateTime\">2008-03-23T11:16:02</dcterms:modified>\n"
+ " <dcterms:isFormatOf rdf:resource=\"ebooks/24899\"/>\n"
+ " <dcterms:extent rdf:datatype=\"http://www.w3.org/2001/XMLSchema#integer\">57081</dcterms:extent>\n"
+ " </pgterms:file>\n" + " </dcterms:hasFormat>\n"
+ " <dcterms:issued rdf:datatype=\"http://www.w3.org/2001/XMLSchema#date\">2008-03-23</dcterms:issued>\n"
+ " <dcterms:subject>\n" + " <rdf:Description rdf:nodeID=\"N390ca0617cdd4b179512b6db4f6b4c3a\">\n"
+ " <rdf:value>Philosophy</rdf:value>\n"
+ " <dcam:memberOf rdf:resource=\"http://purl.org/dc/terms/LCSH\"/>\n" + " </rdf:Description>\n"
+ " </dcterms:subject>\n" + " </pgterms:ebook>\n" + " <cc:Work rdf:about=\"\">\n"
+ " <cc:license rdf:resource=\"https://creativecommons.org/publicdomain/zero/1.0/\"/>\n"
+ " <rdfs:comment>Archives containing the RDF files for *all* our books can be downloaded at\n"
+ " http://www.gutenberg.org/wiki/Gutenberg:Feeds#The_Complete_Project_Gutenberg_Catalog</rdfs:comment>\n"
+ " </cc:Work>\n" + " <rdf:Description rdf:about=\"http://en.wikipedia.org/wiki/Plato\">\n"
+ " <dcterms:description>Wikipedia</dcterms:description>\n" + " </rdf:Description>\n"
+ " <rdf:Description rdf:about=\"http://de.wikipedia.org/wiki/Rudolf_Kassner\">\n"
+ " <dcterms:description>de.wikipedia</dcterms:description>\n" + " </rdf:Description>\n"
+ "</rdf:RDF>\n" + "";
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment