I have this method that creates book objects. I pass a list of strings (size 60000+) that contain the information of the book, the function then goes and extracts the information makes the book object and adds to a global list.
This solution works, but is not memory that efficient and slow due to the replacements and regex that happen each loop and strings being made. So, can I make improvements to my regex and is there a better way perform many replacements per loop
private static void defineBooks(List<String> paragraphs) {
// Pattern id = Pattern.compile("[^\\s]\\d+");
Pattern title = Pattern.compile("[\\S\\s]+");
Pattern author = Pattern.compile(", by(.*)(?=\\s*)");
Pattern subtitle = Pattern.compile("\\[Subtitle: [^\\[\\]]+]");
Pattern language = Pattern.compile("\\[Language: [^\\[\\]]+]");
Pattern contents = Pattern.compile("\\[Contents: [^\\[\\]]+]");
Pattern tempx = Pattern.compile("\\d{1,5}$");
Pattern tempy = Pattern.compile("^\\d{1,5}|\\d{1,5}[A-Z]?\\s?$");
Pattern i = Pattern.compile("^, by ");
Pattern j = Pattern.compile("\\s+(?<= )\\d+(\\w)?$|(?<= )\\d+(\\w)?\\s+$|(?<=\\D)\\d+(\\w)?$");
for (int i1 = 0, paragraphsSize = paragraphs.size(); i1 < paragraphsSize; i1++) {
String para = paragraphs.get(i1);
Matcher mid = tempx.matcher(para.replaceAll("\n.+", ""));
Matcher midy = tempy.matcher(para.replaceAll("\n.+", ""));
Matcher mtitle = title.matcher(para);
Matcher mauthor = author.matcher(para);
Matcher msubtitle = subtitle.matcher(para);
Matcher mlanguage = language.matcher(para);
Matcher mcontents = contents.matcher(para);
Book book = new Book();
Matcher temp = Pattern.compile("@\\d{1,5}[A-Z]?@?")
.matcher(para.replaceAll("\\s{2,}", "@"));
if (temp.find()) {
book.setId(temp.group().replaceAll("@", ""));
} else if (mid.find()) {
book.setId(mid.group().trim());
} else if (midy.find()) {
book.setId(midy.group().trim());
}
if (mtitle.find()) {
book.setTitle(para.replaceAll("\\[[^\\[]+(?:])", "")
.replaceAll("(Passed | by|, by).*+", "")
.replaceAll("\\s{2,}\\d{1,5}(\\s)?", "")
.replaceAll("\n", " ")
.replaceAll(",$", "")
.replaceAll("\\s+", " ").trim());
}
if (mauthor.find()) {
String bauthor = i.matcher(mauthor.group()).replaceAll("");
book.setAuthor(j.matcher(bauthor).replaceAll(""));
}
if (msubtitle.find()) {
book.setSubtitle(msubtitle.group()
.replaceAll("\\[Subtitle: ", "")
.replaceAll("]", "")
.replaceAll("\\s{2,}", " "));
} else {
book.setSubtitle("");
}
if (mcontents.find()) {
book.setContents(mcontents.group()
.replaceAll("\\[Contents: ", "")
.replaceAll("]", "")
.replaceAll("\\s{2,}", " "));
} else {
book.setContents("");
}
if (mlanguage.find()) {
book.setLanguage(mlanguage.group()
.replaceAll("\\[Language: ", "")
.replaceAll("]", "")
.replaceAll("\\s{2,}", " "));
} else {
book.setLanguage("English");
}
if (!Objects.equals(book.getId(), "")) {
books.add(book);
}
}
Book
public class Book {
private String id;
private String title;
private String author;
private String subtitle;
private String contents;
private String language;
public void setId(String id) {
this.id = id;
}
public void setTitle(String title) {
this.title = title;
}
public void setAuthor(String author) {
this.author = author;
}
public void setSubtitle(String subtitle) {
this.subtitle = subtitle;
}
public void setContents(String contents) {
this.contents = contents;
}
public String getContents() {
return contents;
}
public void setLanguage(String language) {
this.language = language;
}
public String getId() {
return id;
}
}
```