2010-06-01 16:09:02,338 DEBUG Config: Configuration: 2010-06-01 16:09:02,338 DEBUG Config: pdfimage.inserter.delete.ocr.text=false 2010-06-01 16:09:02,338 DEBUG Config: pdfimage.inserter.delete.tif=false 2010-06-01 16:09:02,338 DEBUG Config: pdfimage.extractor.blank.image=D:/Java/workspaces/2/pdfimage/blank.png 2010-06-01 16:09:02,338 DEBUG Config: test.dir=E:/Download/test 2010-06-01 16:09:02,338 DEBUG Config: pdfimage.extractor.extract.text=true 2010-06-01 16:09:02,338 DEBUG Config: pdfimage.recognizer.pdf2iot1.enabled=true 2010-06-01 16:09:02,401 DEBUG Config: pdfimage.inserter.enabled=false 2010-06-01 16:09:02,401 DEBUG Config: pdfimage.recognizer.ocr.exe=D:/Projects/ocr/Release/ocr.exe 2010-06-01 16:09:02,401 DEBUG Config: pdfimage.recognizer.enabled=true 2010-06-01 16:09:02,401 DEBUG Config: pdfimage.inserter.junk.phrases=D:/Java/workspaces/2/pdfimage/junk_phrases.txt 2010-06-01 16:09:02,401 DEBUG Config: pdfimage.recognizer.pdf2iot2.enabled=true 2010-06-01 16:09:02,401 DEBUG Config: pdfimage.recognizer.img2txt.enabled=true 2010-06-01 16:09:02,401 DEBUG Config: pdfimage.inserter.image.markers=D:/Java/workspaces/2/pdfimage/image_markers.txt 2010-06-01 16:09:02,401 DEBUG Config: pdfimage.inserter.text.match.threshold=0.95 2010-06-01 16:09:02,401 DEBUG Config: pdfimage.inserter.delete.page.text=false 2010-06-01 16:09:02,401 DEBUG Config: pdfimage.extractor.enabled=true 2010-06-01 16:09:02,401 DEBUG Config: Image markers loaded: 2010-06-01 16:09:02,401 DEBUG Config: CHRG: 9 2010-06-01 16:09:02,401 DEBUG Config: FR: 1 2010-06-01 16:09:02,401 DEBUG Config: HMAN: 14 2010-06-01 16:09:02,401 DEBUG Config: STATUTE: 9 2010-06-01 16:09:02,401 DEBUG Config: PPP: 15 2010-06-01 16:09:02,416 INFO Extractor: Extracting from BUDGET-2010-BUD.pdf ... 2010-06-01 16:09:09,031 WARN PDFStreamEngine: java.lang.IllegalArgumentException java.lang.IllegalArgumentException at org.apache.fontbox.cff.CFFParser.readCharset(CFFParser.java:511) at org.apache.fontbox.cff.CFFParser.parseFont(CFFParser.java:319) at org.apache.fontbox.cff.CFFParser.parse(CFFParser.java:65) at org.apache.pdfbox.pdmodel.font.PDType1CFont.ensureLoaded(PDType1CFont.java:290) at org.apache.pdfbox.pdmodel.font.PDType1CFont.getFontWidth(PDType1CFont.java:138) at org.apache.pdfbox.util.PDFStreamEngine.processEncodedText(PDFStreamEngine.java:323) at org.apache.pdfbox.util.operator.ShowTextGlyph.process(ShowTextGlyph.java:61) at org.apache.pdfbox.util.PDFStreamEngine.processOperator(PDFStreamEngine.java:552) at org.apache.pdfbox.util.PDFStreamEngine.processSubStream(PDFStreamEngine.java:248) at org.apache.pdfbox.util.PDFStreamEngine.processStream(PDFStreamEngine.java:207) at org.apache.pdfbox.util.PDFTextStripper.processPage(PDFTextStripper.java:367) at org.apache.pdfbox.util.PDFTextStripper.processPages(PDFTextStripper.java:291) at org.apache.pdfbox.util.PDFTextStripper.writeText(PDFTextStripper.java:247) at pdfimage.Extractor.extractText(Extractor.java:253) at pdfimage.Extractor.extract(Extractor.java:139) at pdfimage.PdfImage.run(PdfImage.java:94) at pdfimage.PdfImage.main(PdfImage.java:114) 2010-06-01 16:09:09,031 WARN PDFStreamEngine: java.lang.IllegalArgumentException java.lang.IllegalArgumentException at org.apache.fontbox.cff.CFFParser.readCharset(CFFParser.java:511) at org.apache.fontbox.cff.CFFParser.parseFont(CFFParser.java:319) at org.apache.fontbox.cff.CFFParser.parse(CFFParser.java:65) at org.apache.pdfbox.pdmodel.font.PDType1CFont.ensureLoaded(PDType1CFont.java:290) at org.apache.pdfbox.pdmodel.font.PDType1CFont.getAverageFontWidth(PDType1CFont.java:242) at org.apache.pdfbox.util.PDFStreamEngine.processEncodedText(PDFStreamEngine.java:331) at org.apache.pdfbox.util.operator.ShowTextGlyph.process(ShowTextGlyph.java:61) at org.apache.pdfbox.util.PDFStreamEngine.processOperator(PDFStreamEngine.java:552) at org.apache.pdfbox.util.PDFStreamEngine.processSubStream(PDFStreamEngine.java:248) at org.apache.pdfbox.util.PDFStreamEngine.processStream(PDFStreamEngine.java:207) at org.apache.pdfbox.util.PDFTextStripper.processPage(PDFTextStripper.java:367) at org.apache.pdfbox.util.PDFTextStripper.processPages(PDFTextStripper.java:291) at org.apache.pdfbox.util.PDFTextStripper.writeText(PDFTextStripper.java:247) at pdfimage.Extractor.extractText(Extractor.java:253) at pdfimage.Extractor.extract(Extractor.java:139) at pdfimage.PdfImage.run(PdfImage.java:94) at pdfimage.PdfImage.main(PdfImage.java:114) 2010-06-01 16:09:09,031 WARN PDFStreamEngine: java.lang.IllegalArgumentException java.lang.IllegalArgumentException at org.apache.fontbox.cff.CFFParser.readCharset(CFFParser.java:511) at org.apache.fontbox.cff.CFFParser.parseFont(CFFParser.java:319) at org.apache.fontbox.cff.CFFParser.parse(CFFParser.java:65) at org.apache.pdfbox.pdmodel.font.PDType1CFont.ensureLoaded(PDType1CFont.java:290) at org.apache.pdfbox.pdmodel.font.PDType1CFont.getFontWidth(PDType1CFont.java:138) at org.apache.pdfbox.util.PDFStreamEngine.processEncodedText(PDFStreamEngine.java:323) at org.apache.pdfbox.util.operator.ShowText.process(ShowText.java:45) at org.apache.pdfbox.util.PDFStreamEngine.processOperator(PDFStreamEngine.java:552) at org.apache.pdfbox.util.PDFStreamEngine.processSubStream(PDFStreamEngine.java:248) at org.apache.pdfbox.util.PDFStreamEngine.processStream(PDFStreamEngine.java:207) at org.apache.pdfbox.util.PDFTextStripper.processPage(PDFTextStripper.java:367) at org.apache.pdfbox.util.PDFTextStripper.processPages(PDFTextStripper.java:291) at org.apache.pdfbox.util.PDFTextStripper.writeText(PDFTextStripper.java:247) at pdfimage.Extractor.extractText(Extractor.java:253) at pdfimage.Extractor.extract(Extractor.java:139) at pdfimage.PdfImage.run(PdfImage.java:94) at pdfimage.PdfImage.main(PdfImage.java:114) 2010-06-01 16:09:09,031 WARN PDFStreamEngine: java.lang.IllegalArgumentException java.lang.IllegalArgumentException at org.apache.fontbox.cff.CFFParser.readCharset(CFFParser.java:511) at org.apache.fontbox.cff.CFFParser.parseFont(CFFParser.java:319) at org.apache.fontbox.cff.CFFParser.parse(CFFParser.java:65) at org.apache.pdfbox.pdmodel.font.PDType1CFont.ensureLoaded(PDType1CFont.java:290) at org.apache.pdfbox.pdmodel.font.PDType1CFont.getAverageFontWidth(PDType1CFont.java:242) at org.apache.pdfbox.util.PDFStreamEngine.processEncodedText(PDFStreamEngine.java:331) at org.apache.pdfbox.util.operator.ShowText.process(ShowText.java:45) at org.apache.pdfbox.util.PDFStreamEngine.processOperator(PDFStreamEngine.java:552) at org.apache.pdfbox.util.PDFStreamEngine.processSubStream(PDFStreamEngine.java:248) at org.apache.pdfbox.util.PDFStreamEngine.processStream(PDFStreamEngine.java:207) at org.apache.pdfbox.util.PDFTextStripper.processPage(PDFTextStripper.java:367) at org.apache.pdfbox.util.PDFTextStripper.processPages(PDFTextStripper.java:291) at org.apache.pdfbox.util.PDFTextStripper.writeText(PDFTextStripper.java:247) at pdfimage.Extractor.extractText(Extractor.java:253) at pdfimage.Extractor.extract(Extractor.java:139) at pdfimage.PdfImage.run(PdfImage.java:94) at pdfimage.PdfImage.main(PdfImage.java:114) 2010-06-01 16:09:09,031 WARN PDFStreamEngine: java.lang.IllegalArgumentException java.lang.IllegalArgumentException at org.apache.fontbox.cff.CFFParser.readCharset(CFFParser.java:511) at org.apache.fontbox.cff.CFFParser.parseFont(CFFParser.java:319) at org.apache.fontbox.cff.CFFParser.parse(CFFParser.java:65) at org.apache.pdfbox.pdmodel.font.PDType1CFont.ensureLoaded(PDType1CFont.java:290) at org.apache.pdfbox.pdmodel.font.PDType1CFont.getFontWidth(PDType1CFont.java:138) at org.apache.pdfbox.util.PDFStreamEngine.processEncodedText(PDFStreamEngine.java:323) at org.apache.pdfbox.util.operator.ShowTextGlyph.process(ShowTextGlyph.java:61) at org.apache.pdfbox.util.PDFStreamEngine.processOperator(PDFStreamEngine.java:552) at org.apache.pdfbox.util.PDFStreamEngine.processSubStream(PDFStreamEngine.java:248) at org.apache.pdfbox.util.PDFStreamEngine.processStream(PDFStreamEngine.java:207) at org.apache.pdfbox.util.PDFTextStripper.processPage(PDFTextStripper.java:367) at org.apache.pdfbox.util.PDFTextStripper.processPages(PDFTextStripper.java:291) at org.apache.pdfbox.util.PDFTextStripper.writeText(PDFTextStripper.java:247) at pdfimage.Extractor.extractText(Extractor.java:253) at pdfimage.Extractor.extract(Extractor.java:139) at pdfimage.PdfImage.run(PdfImage.java:94) at pdfimage.PdfImage.main(PdfImage.java:114) 2010-06-01 16:09:09,031 WARN PDFStreamEngine: java.lang.IllegalArgumentException java.lang.IllegalArgumentException at org.apache.fontbox.cff.CFFParser.readCharset(CFFParser.java:511) at org.apache.fontbox.cff.CFFParser.parseFont(CFFParser.java:319) at org.apache.fontbox.cff.CFFParser.parse(CFFParser.java:65) at org.apache.pdfbox.pdmodel.font.PDType1CFont.ensureLoaded(PDType1CFont.java:290) at org.apache.pdfbox.pdmodel.font.PDType1CFont.getAverageFontWidth(PDType1CFont.java:242) at org.apache.pdfbox.util.PDFStreamEngine.processEncodedText(PDFStreamEngine.java:331) at org.apache.pdfbox.util.operator.ShowTextGlyph.process(ShowTextGlyph.java:61) at org.apache.pdfbox.util.PDFStreamEngine.processOperator(PDFStreamEngine.java:552) at org.apache.pdfbox.util.PDFStreamEngine.processSubStream(PDFStreamEngine.java:248) at org.apache.pdfbox.util.PDFStreamEngine.processStream(PDFStreamEngine.java:207) at org.apache.pdfbox.util.PDFTextStripper.processPage(PDFTextStripper.java:367) at org.apache.pdfbox.util.PDFTextStripper.processPages(PDFTextStripper.java:291) at org.apache.pdfbox.util.PDFTextStripper.writeText(PDFTextStripper.java:247) at pdfimage.Extractor.extractText(Extractor.java:253) at pdfimage.Extractor.extract(Extractor.java:139) at pdfimage.PdfImage.run(PdfImage.java:94) at pdfimage.PdfImage.main(PdfImage.java:114) 2010-06-01 16:09:09,031 WARN PDFStreamEngine: java.lang.IllegalArgumentException java.lang.IllegalArgumentException at org.apache.fontbox.cff.CFFParser.readCharset(CFFParser.java:511) at org.apache.fontbox.cff.CFFParser.parseFont(CFFParser.java:319) at org.apache.fontbox.cff.CFFParser.parse(CFFParser.java:65) at org.apache.pdfbox.pdmodel.font.PDType1CFont.ensureLoaded(PDType1CFont.java:290) at org.apache.pdfbox.pdmodel.font.PDType1CFont.getFontWidth(PDType1CFont.java:138) at org.apache.pdfbox.util.PDFStreamEngine.processEncodedText(PDFStreamEngine.java:323) at org.apache.pdfbox.util.operator.ShowTextGlyph.process(ShowTextGlyph.java:61) at org.apache.pdfbox.util.PDFStreamEngine.processOperator(PDFStreamEngine.java:552) at org.apache.pdfbox.util.PDFStreamEngine.processSubStream(PDFStreamEngine.java:248) at org.apache.pdfbox.util.PDFStreamEngine.processStream(PDFStreamEngine.java:207) at org.apache.pdfbox.util.PDFTextStripper.processPage(PDFTextStripper.java:367) at org.apache.pdfbox.util.PDFTextStripper.processPages(PDFTextStripper.java:291) at org.apache.pdfbox.util.PDFTextStripper.writeText(PDFTextStripper.java:247) at pdfimage.Extractor.extractText(Extractor.java:253) at pdfimage.Extractor.extract(Extractor.java:139) at pdfimage.PdfImage.run(PdfImage.java:94) at pdfimage.PdfImage.main(PdfImage.java:114) 2010-06-01 16:09:09,031 WARN PDFStreamEngine: java.lang.IllegalArgumentException java.lang.IllegalArgumentException at org.apache.fontbox.cff.CFFParser.readCharset(CFFParser.java:511) at org.apache.fontbox.cff.CFFParser.parseFont(CFFParser.java:319) at org.apache.fontbox.cff.CFFParser.parse(CFFParser.java:65) at org.apache.pdfbox.pdmodel.font.PDType1CFont.ensureLoaded(PDType1CFont.java:290) at org.apache.pdfbox.pdmodel.font.PDType1CFont.getAverageFontWidth(PDType1CFont.java:242) at org.apache.pdfbox.util.PDFStreamEngine.processEncodedText(PDFStreamEngine.java:331) at org.apache.pdfbox.util.operator.ShowTextGlyph.process(ShowTextGlyph.java:61) at org.apache.pdfbox.util.PDFStreamEngine.processOperator(PDFStreamEngine.java:552) at org.apache.pdfbox.util.PDFStreamEngine.processSubStream(PDFStreamEngine.java:248) at org.apache.pdfbox.util.PDFStreamEngine.processStream(PDFStreamEngine.java:207) at org.apache.pdfbox.util.PDFTextStripper.processPage(PDFTextStripper.java:367) at org.apache.pdfbox.util.PDFTextStripper.processPages(PDFTextStripper.java:291) at org.apache.pdfbox.util.PDFTextStripper.writeText(PDFTextStripper.java:247) at pdfimage.Extractor.extractText(Extractor.java:253) at pdfimage.Extractor.extract(Extractor.java:139) at pdfimage.PdfImage.run(PdfImage.java:94) at pdfimage.PdfImage.main(PdfImage.java:114) 2010-06-01 16:09:09,031 WARN PDFStreamEngine: java.lang.IllegalArgumentException java.lang.IllegalArgumentException at org.apache.fontbox.cff.CFFParser.readCharset(CFFParser.java:511) at org.apache.fontbox.cff.CFFParser.parseFont(CFFParser.java:319) at org.apache.fontbox.cff.CFFParser.parse(CFFParser.java:65) at org.apache.pdfbox.pdmodel.font.PDType1CFont.ensureLoaded(PDType1CFont.java:290) at org.apache.pdfbox.pdmodel.font.PDType1CFont.getFontWidth(PDType1CFont.java:138) at org.apache.pdfbox.util.PDFStreamEngine.processEncodedText(PDFStreamEngine.java:323) at org.apache.pdfbox.util.operator.ShowTextGlyph.process(ShowTextGlyph.java:61) at org.apache.pdfbox.util.PDFStreamEngine.processOperator(PDFStreamEngine.java:552) at org.apache.pdfbox.util.PDFStreamEngine.processSubStream(PDFStreamEngine.java:248) at org.apache.pdfbox.util.PDFStreamEngine.processStream(PDFStreamEngine.java:207) at org.apache.pdfbox.util.PDFTextStripper.processPage(PDFTextStripper.java:367) at org.apache.pdfbox.util.PDFTextStripper.processPages(PDFTextStripper.java:291) at org.apache.pdfbox.util.PDFTextStripper.writeText(PDFTextStripper.java:247) at pdfimage.Extractor.extractText(Extractor.java:253) at pdfimage.Extractor.extract(Extractor.java:139) at pdfimage.PdfImage.run(PdfImage.java:94) at pdfimage.PdfImage.main(PdfImage.java:114) 2010-06-01 16:09:09,046 WARN PDFStreamEngine: java.lang.IllegalArgumentException java.lang.IllegalArgumentException at org.apache.fontbox.cff.CFFParser.readCharset(CFFParser.java:511) at org.apache.fontbox.cff.CFFParser.parseFont(CFFParser.java:319) at org.apache.fontbox.cff.CFFParser.parse(CFFParser.java:65) at org.apache.pdfbox.pdmodel.font.PDType1CFont.ensureLoaded(PDType1CFont.java:290) at org.apache.pdfbox.pdmodel.font.PDType1CFont.getAverageFontWidth(PDType1CFont.java:242) at org.apache.pdfbox.util.PDFStreamEngine.processEncodedText(PDFStreamEngine.java:331) at org.apache.pdfbox.util.operator.ShowTextGlyph.process(ShowTextGlyph.java:61) at org.apache.pdfbox.util.PDFStreamEngine.processOperator(PDFStreamEngine.java:552) at org.apache.pdfbox.util.PDFStreamEngine.processSubStream(PDFStreamEngine.java:248) at org.apache.pdfbox.util.PDFStreamEngine.processStream(PDFStreamEngine.java:207) at org.apache.pdfbox.util.PDFTextStripper.processPage(PDFTextStripper.java:367) at org.apache.pdfbox.util.PDFTextStripper.processPages(PDFTextStripper.java:291) at org.apache.pdfbox.util.PDFTextStripper.writeText(PDFTextStripper.java:247) at pdfimage.Extractor.extractText(Extractor.java:253) at pdfimage.Extractor.extract(Extractor.java:139) at pdfimage.PdfImage.run(PdfImage.java:94) at pdfimage.PdfImage.main(PdfImage.java:114) 2010-06-01 16:09:09,046 WARN PDFStreamEngine: java.lang.IllegalArgumentException java.lang.IllegalArgumentException at org.apache.fontbox.cff.CFFParser.readCharset(CFFParser.java:511) at org.apache.fontbox.cff.CFFParser.parseFont(CFFParser.java:319) at org.apache.fontbox.cff.CFFParser.parse(CFFParser.java:65) at org.apache.pdfbox.pdmodel.font.PDType1CFont.ensureLoaded(PDType1CFont.java:290) at org.apache.pdfbox.pdmodel.font.PDType1CFont.getFontWidth(PDType1CFont.java:138) at org.apache.pdfbox.util.PDFStreamEngine.processEncodedText(PDFStreamEngine.java:323) at org.apache.pdfbox.util.operator.ShowText.process(ShowText.java:45) at org.apache.pdfbox.util.PDFStreamEngine.processOperator(PDFStreamEngine.java:552) at org.apache.pdfbox.util.PDFStreamEngine.processSubStream(PDFStreamEngine.java:248) at org.apache.pdfbox.util.PDFStreamEngine.processStream(PDFStreamEngine.java:207) at org.apache.pdfbox.util.PDFTextStripper.processPage(PDFTextStripper.java:367) at org.apache.pdfbox.util.PDFTextStripper.processPages(PDFTextStripper.java:291) at org.apache.pdfbox.util.PDFTextStripper.writeText(PDFTextStripper.java:247) at pdfimage.Extractor.extractText(Extractor.java:253) at pdfimage.Extractor.extract(Extractor.java:139) at pdfimage.PdfImage.run(PdfImage.java:94) at pdfimage.PdfImage.main(PdfImage.java:114) 2010-06-01 16:09:09,046 WARN PDFStreamEngine: java.lang.IllegalArgumentException java.lang.IllegalArgumentException at org.apache.fontbox.cff.CFFParser.readCharset(CFFParser.java:511) at org.apache.fontbox.cff.CFFParser.parseFont(CFFParser.java:319) at org.apache.fontbox.cff.CFFParser.parse(CFFParser.java:65) at org.apache.pdfbox.pdmodel.font.PDType1CFont.ensureLoaded(PDType1CFont.java:290) at org.apache.pdfbox.pdmodel.font.PDType1CFont.getAverageFontWidth(PDType1CFont.java:242) at org.apache.pdfbox.util.PDFStreamEngine.processEncodedText(PDFStreamEngine.java:331) at org.apache.pdfbox.util.operator.ShowText.process(ShowText.java:45) at org.apache.pdfbox.util.PDFStreamEngine.processOperator(PDFStreamEngine.java:552) at org.apache.pdfbox.util.PDFStreamEngine.processSubStream(PDFStreamEngine.java:248) at org.apache.pdfbox.util.PDFStreamEngine.processStream(PDFStreamEngine.java:207) at org.apache.pdfbox.util.PDFTextStripper.processPage(PDFTextStripper.java:367) at org.apache.pdfbox.util.PDFTextStripper.processPages(PDFTextStripper.java:291) at org.apache.pdfbox.util.PDFTextStripper.writeText(PDFTextStripper.java:247) at pdfimage.Extractor.extractText(Extractor.java:253) at pdfimage.Extractor.extract(Extractor.java:139) at pdfimage.PdfImage.run(PdfImage.java:94) at pdfimage.PdfImage.main(PdfImage.java:114) 2010-06-01 16:09:09,046 WARN PDFStreamEngine: java.lang.IllegalArgumentException java.lang.IllegalArgumentException at org.apache.fontbox.cff.CFFParser.readCharset(CFFParser.java:511) at org.apache.fontbox.cff.CFFParser.parseFont(CFFParser.java:319) at org.apache.fontbox.cff.CFFParser.parse(CFFParser.java:65) at org.apache.pdfbox.pdmodel.font.PDType1CFont.ensureLoaded(PDType1CFont.java:290) at org.apache.pdfbox.pdmodel.font.PDType1CFont.getFontWidth(PDType1CFont.java:138) at org.apache.pdfbox.util.PDFStreamEngine.processEncodedText(PDFStreamEngine.java:323) at org.apache.pdfbox.util.operator.ShowTextGlyph.process(ShowTextGlyph.java:61) at org.apache.pdfbox.util.PDFStreamEngine.processOperator(PDFStreamEngine.java:552) at org.apache.pdfbox.util.PDFStreamEngine.processSubStream(PDFStreamEngine.java:248) at org.apache.pdfbox.util.PDFStreamEngine.processStream(PDFStreamEngine.java:207) at org.apache.pdfbox.util.PDFTextStripper.processPage(PDFTextStripper.java:367) at org.apache.pdfbox.util.PDFTextStripper.processPages(PDFTextStripper.java:291) at org.apache.pdfbox.util.PDFTextStripper.writeText(PDFTextStripper.java:247) at pdfimage.Extractor.extractText(Extractor.java:253) at pdfimage.Extractor.extract(Extractor.java:139) at pdfimage.PdfImage.run(PdfImage.java:94) at pdfimage.PdfImage.main(PdfImage.java:114) 2010-06-01 16:09:09,046 WARN PDFStreamEngine: java.lang.IllegalArgumentException java.lang.IllegalArgumentException at org.apache.fontbox.cff.CFFParser.readCharset(CFFParser.java:511) at org.apache.fontbox.cff.CFFParser.parseFont(CFFParser.java:319) at org.apache.fontbox.cff.CFFParser.parse(CFFParser.java:65) at org.apache.pdfbox.pdmodel.font.PDType1CFont.ensureLoaded(PDType1CFont.java:290) at org.apache.pdfbox.pdmodel.font.PDType1CFont.getAverageFontWidth(PDType1CFont.java:242) at org.apache.pdfbox.util.PDFStreamEngine.processEncodedText(PDFStreamEngine.java:331) at org.apache.pdfbox.util.operator.ShowTextGlyph.process(ShowTextGlyph.java:61) at org.apache.pdfbox.util.PDFStreamEngine.processOperator(PDFStreamEngine.java:552) at org.apache.pdfbox.util.PDFStreamEngine.processSubStream(PDFStreamEngine.java:248) at org.apache.pdfbox.util.PDFStreamEngine.processStream(PDFStreamEngine.java:207) at org.apache.pdfbox.util.PDFTextStripper.processPage(PDFTextStripper.java:367) at org.apache.pdfbox.util.PDFTextStripper.processPages(PDFTextStripper.java:291) at org.apache.pdfbox.util.PDFTextStripper.writeText(PDFTextStripper.java:247) at pdfimage.Extractor.extractText(Extractor.java:253) at pdfimage.Extractor.extract(Extractor.java:139) at pdfimage.PdfImage.run(PdfImage.java:94) at pdfimage.PdfImage.main(PdfImage.java:114) 2010-06-01 16:09:09,046 WARN PDFStreamEngine: java.lang.IllegalArgumentException java.lang.IllegalArgumentException at org.apache.fontbox.cff.CFFParser.readCharset(CFFParser.java:511) at org.apache.fontbox.cff.CFFParser.parseFont(CFFParser.java:319) at org.apache.fontbox.cff.CFFParser.parse(CFFParser.java:65) at org.apache.pdfbox.pdmodel.font.PDType1CFont.ensureLoaded(PDType1CFont.java:290) at org.apache.pdfbox.pdmodel.font.PDType1CFont.getFontWidth(PDType1CFont.java:138) at org.apache.pdfbox.util.PDFStreamEngine.processEncodedText(PDFStreamEngine.java:323) at org.apache.pdfbox.util.operator.ShowText.process(ShowText.java:45) at org.apache.pdfbox.util.PDFStreamEngine.processOperator(PDFStreamEngine.java:552) at org.apache.pdfbox.util.PDFStreamEngine.processSubStream(PDFStreamEngine.java:248) at org.apache.pdfbox.util.PDFStreamEngine.processStream(PDFStreamEngine.java:207) at org.apache.pdfbox.util.PDFTextStripper.processPage(PDFTextStripper.java:367) at org.apache.pdfbox.util.PDFTextStripper.processPages(PDFTextStripper.java:291) at org.apache.pdfbox.util.PDFTextStripper.writeText(PDFTextStripper.java:247) at pdfimage.Extractor.extractText(Extractor.java:253) at pdfimage.Extractor.extract(Extractor.java:139) at pdfimage.PdfImage.run(PdfImage.java:94) at pdfimage.PdfImage.main(PdfImage.java:114) 2010-06-01 16:09:09,046 WARN PDFStreamEngine: java.lang.IllegalArgumentException java.lang.IllegalArgumentException at org.apache.fontbox.cff.CFFParser.readCharset(CFFParser.java:511) at org.apache.fontbox.cff.CFFParser.parseFont(CFFParser.java:319) at org.apache.fontbox.cff.CFFParser.parse(CFFParser.java:65) at org.apache.pdfbox.pdmodel.font.PDType1CFont.ensureLoaded(PDType1CFont.java:290) at org.apache.pdfbox.pdmodel.font.PDType1CFont.getAverageFontWidth(PDType1CFont.java:242) at org.apache.pdfbox.util.PDFStreamEngine.processEncodedText(PDFStreamEngine.java:331) at org.apache.pdfbox.util.operator.ShowText.process(ShowText.java:45) at org.apache.pdfbox.util.PDFStreamEngine.processOperator(PDFStreamEngine.java:552) at org.apache.pdfbox.util.PDFStreamEngine.processSubStream(PDFStreamEngine.java:248) at org.apache.pdfbox.util.PDFStreamEngine.processStream(PDFStreamEngine.java:207) at org.apache.pdfbox.util.PDFTextStripper.processPage(PDFTextStripper.java:367) at org.apache.pdfbox.util.PDFTextStripper.processPages(PDFTextStripper.java:291) at org.apache.pdfbox.util.PDFTextStripper.writeText(PDFTextStripper.java:247) at pdfimage.Extractor.extractText(Extractor.java:253) at pdfimage.Extractor.extract(Extractor.java:139) at pdfimage.PdfImage.run(PdfImage.java:94) at pdfimage.PdfImage.main(PdfImage.java:114) 2010-06-01 16:09:09,046 WARN PDFStreamEngine: java.lang.IllegalArgumentException java.lang.IllegalArgumentException at org.apache.fontbox.cff.CFFParser.readCharset(CFFParser.java:511) at org.apache.fontbox.cff.CFFParser.parseFont(CFFParser.java:319) at org.apache.fontbox.cff.CFFParser.parse(CFFParser.java:65) at org.apache.pdfbox.pdmodel.font.PDType1CFont.ensureLoaded(PDType1CFont.java:290) at org.apache.pdfbox.pdmodel.font.PDType1CFont.getFontWidth(PDType1CFont.java:138) at org.apache.pdfbox.util.PDFStreamEngine.processEncodedText(PDFStreamEngine.java:323) at org.apache.pdfbox.util.operator.ShowTextGlyph.process(ShowTextGlyph.java:61) at org.apache.pdfbox.util.PDFStreamEngine.processOperator(PDFStreamEngine.java:552) at org.apache.pdfbox.util.PDFStreamEngine.processSubStream(PDFStreamEngine.java:248) at org.apache.pdfbox.util.PDFStreamEngine.processStream(PDFStreamEngine.java:207) at org.apache.pdfbox.util.PDFTextStripper.processPage(PDFTextStripper.java:367) at org.apache.pdfbox.util.PDFTextStripper.processPages(PDFTextStripper.java:291) at org.apache.pdfbox.util.PDFTextStripper.writeText(PDFTextStripper.java:247) at pdfimage.Extractor.extractText(Extractor.java:253) at pdfimage.Extractor.extract(Extractor.java:139) at pdfimage.PdfImage.run(PdfImage.java:94) at pdfimage.PdfImage.main(PdfImage.java:114) 2010-06-01 16:09:09,046 WARN PDFStreamEngine: java.lang.IllegalArgumentException java.lang.IllegalArgumentException at org.apache.fontbox.cff.CFFParser.readCharset(CFFParser.java:511) at org.apache.fontbox.cff.CFFParser.parseFont(CFFParser.java:319) at org.apache.fontbox.cff.CFFParser.parse(CFFParser.java:65) at org.apache.pdfbox.pdmodel.font.PDType1CFont.ensureLoaded(PDType1CFont.java:290) at org.apache.pdfbox.pdmodel.font.PDType1CFont.getAverageFontWidth(PDType1CFont.java:242) at org.apache.pdfbox.util.PDFStreamEngine.processEncodedText(PDFStreamEngine.java:331) at org.apache.pdfbox.util.operator.ShowTextGlyph.process(ShowTextGlyph.java:61) at org.apache.pdfbox.util.PDFStreamEngine.processOperator(PDFStreamEngine.java:552) at org.apache.pdfbox.util.PDFStreamEngine.processSubStream(PDFStreamEngine.java:248) at org.apache.pdfbox.util.PDFStreamEngine.processStream(PDFStreamEngine.java:207) at org.apache.pdfbox.util.PDFTextStripper.processPage(PDFTextStripper.java:367) at org.apache.pdfbox.util.PDFTextStripper.processPages(PDFTextStripper.java:291) at org.apache.pdfbox.util.PDFTextStripper.writeText(PDFTextStripper.java:247) at pdfimage.Extractor.extractText(Extractor.java:253) at pdfimage.Extractor.extract(Extractor.java:139) at pdfimage.PdfImage.run(PdfImage.java:94) at pdfimage.PdfImage.main(PdfImage.java:114) 2010-06-01 16:09:09,046 WARN PDFStreamEngine: java.lang.IllegalArgumentException java.lang.IllegalArgumentException at org.apache.fontbox.cff.CFFParser.readCharset(CFFParser.java:511) at org.apache.fontbox.cff.CFFParser.parseFont(CFFParser.java:319) at org.apache.fontbox.cff.CFFParser.parse(CFFParser.java:65) at org.apache.pdfbox.pdmodel.font.PDType1CFont.ensureLoaded(PDType1CFont.java:290) at org.apache.pdfbox.pdmodel.font.PDType1CFont.getFontWidth(PDType1CFont.java:138) at org.apache.pdfbox.util.PDFStreamEngine.processEncodedText(PDFStreamEngine.java:323) at org.apache.pdfbox.util.operator.ShowText.process(ShowText.java:45) at org.apache.pdfbox.util.PDFStreamEngine.processOperator(PDFStreamEngine.java:552) at org.apache.pdfbox.util.PDFStreamEngine.processSubStream(PDFStreamEngine.java:248) at org.apache.pdfbox.util.PDFStreamEngine.processStream(PDFStreamEngine.java:207) at org.apache.pdfbox.util.PDFTextStripper.processPage(PDFTextStripper.java:367) at org.apache.pdfbox.util.PDFTextStripper.processPages(PDFTextStripper.java:291) at org.apache.pdfbox.util.PDFTextStripper.writeText(PDFTextStripper.java:247) at pdfimage.Extractor.extractText(Extractor.java:253) at pdfimage.Extractor.extract(Extractor.java:139) at pdfimage.PdfImage.run(PdfImage.java:94) at pdfimage.PdfImage.main(PdfImage.java:114) 2010-06-01 16:09:09,046 WARN PDFStreamEngine: java.lang.IllegalArgumentException java.lang.IllegalArgumentException at org.apache.fontbox.cff.CFFParser.readCharset(CFFParser.java:511) at org.apache.fontbox.cff.CFFParser.parseFont(CFFParser.java:319) at org.apache.fontbox.cff.CFFParser.parse(CFFParser.java:65) at org.apache.pdfbox.pdmodel.font.PDType1CFont.ensureLoaded(PDType1CFont.java:290) at org.apache.pdfbox.pdmodel.font.PDType1CFont.getAverageFontWidth(PDType1CFont.java:242) at org.apache.pdfbox.util.PDFStreamEngine.processEncodedText(PDFStreamEngine.java:331) at org.apache.pdfbox.util.operator.ShowText.process(ShowText.java:45) at org.apache.pdfbox.util.PDFStreamEngine.processOperator(PDFStreamEngine.java:552) at org.apache.pdfbox.util.PDFStreamEngine.processSubStream(PDFStreamEngine.java:248) at org.apache.pdfbox.util.PDFStreamEngine.processStream(PDFStreamEngine.java:207) at org.apache.pdfbox.util.PDFTextStripper.processPage(PDFTextStripper.java:367) at org.apache.pdfbox.util.PDFTextStripper.processPages(PDFTextStripper.java:291) at org.apache.pdfbox.util.PDFTextStripper.writeText(PDFTextStripper.java:247) at pdfimage.Extractor.extractText(Extractor.java:253) at pdfimage.Extractor.extract(Extractor.java:139) at pdfimage.PdfImage.run(PdfImage.java:94) at pdfimage.PdfImage.main(PdfImage.java:114) 2010-06-01 16:09:09,046 WARN PDFStreamEngine: java.lang.IllegalArgumentException java.lang.IllegalArgumentException at org.apache.fontbox.cff.CFFParser.readCharset(CFFParser.java:511) at org.apache.fontbox.cff.CFFParser.parseFont(CFFParser.java:319) at org.apache.fontbox.cff.CFFParser.parse(CFFParser.java:65) at org.apache.pdfbox.pdmodel.font.PDType1CFont.ensureLoaded(PDType1CFont.java:290) at org.apache.pdfbox.pdmodel.font.PDType1CFont.getFontWidth(PDType1CFont.java:138) at org.apache.pdfbox.util.PDFStreamEngine.processEncodedText(PDFStreamEngine.java:323) at org.apache.pdfbox.util.operator.ShowTextGlyph.process(ShowTextGlyph.java:61) at org.apache.pdfbox.util.PDFStreamEngine.processOperator(PDFStreamEngine.java:552) at org.apache.pdfbox.util.PDFStreamEngine.processSubStream(PDFStreamEngine.java:248) at org.apache.pdfbox.util.PDFStreamEngine.processStream(PDFStreamEngine.java:207) at org.apache.pdfbox.util.PDFTextStripper.processPage(PDFTextStripper.java:367) at org.apache.pdfbox.util.PDFTextStripper.processPages(PDFTextStripper.java:291) at org.apache.pdfbox.util.PDFTextStripper.writeText(PDFTextStripper.java:247) at pdfimage.Extractor.extractText(Extractor.java:253) at pdfimage.Extractor.extract(Extractor.java:139) at pdfimage.PdfImage.run(PdfImage.java:94) at pdfimage.PdfImage.main(PdfImage.java:114) 2010-06-01 16:09:09,046 WARN PDFStreamEngine: java.lang.IllegalArgumentException java.lang.IllegalArgumentException at org.apache.fontbox.cff.CFFParser.readCharset(CFFParser.java:511) at org.apache.fontbox.cff.CFFParser.parseFont(CFFParser.java:319) at org.apache.fontbox.cff.CFFParser.parse(CFFParser.java:65) at org.apache.pdfbox.pdmodel.font.PDType1CFont.ensureLoaded(PDType1CFont.java:290) at org.apache.pdfbox.pdmodel.font.PDType1CFont.getAverageFontWidth(PDType1CFont.java:242) at org.apache.pdfbox.util.PDFStreamEngine.processEncodedText(PDFStreamEngine.java:331) at org.apache.pdfbox.util.operator.ShowTextGlyph.process(ShowTextGlyph.java:61) at org.apache.pdfbox.util.PDFStreamEngine.processOperator(PDFStreamEngine.java:552) at org.apache.pdfbox.util.PDFStreamEngine.processSubStream(PDFStreamEngine.java:248) at org.apache.pdfbox.util.PDFStreamEngine.processStream(PDFStreamEngine.java:207) at org.apache.pdfbox.util.PDFTextStripper.processPage(PDFTextStripper.java:367) at org.apache.pdfbox.util.PDFTextStripper.processPages(PDFTextStripper.java:291) at org.apache.pdfbox.util.PDFTextStripper.writeText(PDFTextStripper.java:247) at pdfimage.Extractor.extractText(Extractor.java:253) at pdfimage.Extractor.extract(Extractor.java:139) at pdfimage.PdfImage.run(PdfImage.java:94) at pdfimage.PdfImage.main(PdfImage.java:114) 2010-06-01 16:09:09,062 WARN PDFStreamEngine: java.lang.IllegalArgumentException java.lang.IllegalArgumentException at org.apache.fontbox.cff.CFFParser.readCharset(CFFParser.java:511) at org.apache.fontbox.cff.CFFParser.parseFont(CFFParser.java:319) at org.apache.fontbox.cff.CFFParser.parse(CFFParser.java:65) at org.apache.pdfbox.pdmodel.font.PDType1CFont.ensureLoaded(PDType1CFont.java:290) at org.apache.pdfbox.pdmodel.font.PDType1CFont.getFontWidth(PDType1CFont.java:138) at org.apache.pdfbox.util.PDFStreamEngine.processEncodedText(PDFStreamEngine.java:323) at org.apache.pdfbox.util.operator.ShowTextGlyph.process(ShowTextGlyph.java:61) at org.apache.pdfbox.util.PDFStreamEngine.processOperator(PDFStreamEngine.java:552) at org.apache.pdfbox.util.PDFStreamEngine.processSubStream(PDFStreamEngine.java:248) at org.apache.pdfbox.util.PDFStreamEngine.processStream(PDFStreamEngine.java:207) at org.apache.pdfbox.util.PDFTextStripper.processPage(PDFTextStripper.java:367) at org.apache.pdfbox.util.PDFTextStripper.processPages(PDFTextStripper.java:291) at org.apache.pdfbox.util.PDFTextStripper.writeText(PDFTextStripper.java:247) at pdfimage.Extractor.extractText(Extractor.java:253) at pdfimage.Extractor.extract(Extractor.java:139) at pdfimage.PdfImage.run(PdfImage.java:94) at pdfimage.PdfImage.main(PdfImage.java:114) 2010-06-01 16:09:09,062 WARN PDFStreamEngine: java.lang.IllegalArgumentException java.lang.IllegalArgumentException at org.apache.fontbox.cff.CFFParser.readCharset(CFFParser.java:511) at org.apache.fontbox.cff.CFFParser.parseFont(CFFParser.java:319) at org.apache.fontbox.cff.CFFParser.parse(CFFParser.java:65) at org.apache.pdfbox.pdmodel.font.PDType1CFont.ensureLoaded(PDType1CFont.java:290) at org.apache.pdfbox.pdmodel.font.PDType1CFont.getAverageFontWidth(PDType1CFont.java:242) at org.apache.pdfbox.util.PDFStreamEngine.processEncodedText(PDFStreamEngine.java:331) at org.apache.pdfbox.util.operator.ShowTextGlyph.process(ShowTextGlyph.java:61) at org.apache.pdfbox.util.PDFStreamEngine.processOperator(PDFStreamEngine.java:552) at org.apache.pdfbox.util.PDFStreamEngine.processSubStream(PDFStreamEngine.java:248) at org.apache.pdfbox.util.PDFStreamEngine.processStream(PDFStreamEngine.java:207) at org.apache.pdfbox.util.PDFTextStripper.processPage(PDFTextStripper.java:367) at org.apache.pdfbox.util.PDFTextStripper.processPages(PDFTextStripper.java:291) at org.apache.pdfbox.util.PDFTextStripper.writeText(PDFTextStripper.java:247) at pdfimage.Extractor.extractText(Extractor.java:253) at pdfimage.Extractor.extract(Extractor.java:139) at pdfimage.PdfImage.run(PdfImage.java:94) at pdfimage.PdfImage.main(PdfImage.java:114) 2010-06-01 16:09:09,062 WARN PDFStreamEngine: java.lang.IllegalArgumentException java.lang.IllegalArgumentException at org.apache.fontbox.cff.CFFParser.readCharset(CFFParser.java:511) at org.apache.fontbox.cff.CFFParser.parseFont(CFFParser.java:319) at org.apache.fontbox.cff.CFFParser.parse(CFFParser.java:65) at org.apache.pdfbox.pdmodel.font.PDType1CFont.ensureLoaded(PDType1CFont.java:290) at org.apache.pdfbox.pdmodel.font.PDType1CFont.getFontWidth(PDType1CFont.java:138) at org.apache.pdfbox.util.PDFStreamEngine.processEncodedText(PDFStreamEngine.java:323) at org.apache.pdfbox.util.operator.ShowTextGlyph.process(ShowTextGlyph.java:61) at org.apache.pdfbox.util.PDFStreamEngine.processOperator(PDFStreamEngine.java:552) at org.apache.pdfbox.util.PDFStreamEngine.processSubStream(PDFStreamEngine.java:248) at org.apache.pdfbox.util.PDFStreamEngine.processStream(PDFStreamEngine.java:207) at org.apache.pdfbox.util.PDFTextStripper.processPage(PDFTextStripper.java:367) at org.apache.pdfbox.util.PDFTextStripper.processPages(PDFTextStripper.java:291) at org.apache.pdfbox.util.PDFTextStripper.writeText(PDFTextStripper.java:247) at pdfimage.Extractor.extractText(Extractor.java:253) at pdfimage.Extractor.extract(Extractor.java:139) at pdfimage.PdfImage.run(PdfImage.java:94) at pdfimage.PdfImage.main(PdfImage.java:114) 2010-06-01 16:09:09,062 WARN PDFStreamEngine: java.lang.IllegalArgumentException java.lang.IllegalArgumentException at org.apache.fontbox.cff.CFFParser.readCharset(CFFParser.java:511) at org.apache.fontbox.cff.CFFParser.parseFont(CFFParser.java:319) at org.apache.fontbox.cff.CFFParser.parse(CFFParser.java:65) at org.apache.pdfbox.pdmodel.font.PDType1CFont.ensureLoaded(PDType1CFont.java:290) at org.apache.pdfbox.pdmodel.font.PDType1CFont.getAverageFontWidth(PDType1CFont.java:242) at org.apache.pdfbox.util.PDFStreamEngine.processEncodedText(PDFStreamEngine.java:331) at org.apache.pdfbox.util.operator.ShowTextGlyph.process(ShowTextGlyph.java:61) at org.apache.pdfbox.util.PDFStreamEngine.processOperator(PDFStreamEngine.java:552) at org.apache.pdfbox.util.PDFStreamEngine.processSubStream(PDFStreamEngine.java:248) at org.apache.pdfbox.util.PDFStreamEngine.processStream(PDFStreamEngine.java:207) at org.apache.pdfbox.util.PDFTextStripper.processPage(PDFTextStripper.java:367) at org.apache.pdfbox.util.PDFTextStripper.processPages(PDFTextStripper.java:291) at org.apache.pdfbox.util.PDFTextStripper.writeText(PDFTextStripper.java:247) at pdfimage.Extractor.extractText(Extractor.java:253) at pdfimage.Extractor.extract(Extractor.java:139) at pdfimage.PdfImage.run(PdfImage.java:94) at pdfimage.PdfImage.main(PdfImage.java:114) 2010-06-01 16:09:09,062 WARN PDFStreamEngine: java.lang.IllegalArgumentException java.lang.IllegalArgumentException at org.apache.fontbox.cff.CFFParser.readCharset(CFFParser.java:511) at org.apache.fontbox.cff.CFFParser.parseFont(CFFParser.java:319) at org.apache.fontbox.cff.CFFParser.parse(CFFParser.java:65) at org.apache.pdfbox.pdmodel.font.PDType1CFont.ensureLoaded(PDType1CFont.java:290) at org.apache.pdfbox.pdmodel.font.PDType1CFont.getFontWidth(PDType1CFont.java:138) at org.apache.pdfbox.util.PDFStreamEngine.processEncodedText(PDFStreamEngine.java:323) at org.apache.pdfbox.util.operator.ShowTextGlyph.process(ShowTextGlyph.java:61) at org.apache.pdfbox.util.PDFStreamEngine.processOperator(PDFStreamEngine.java:552) at org.apache.pdfbox.util.PDFStreamEngine.processSubStream(PDFStreamEngine.java:248) at org.apache.pdfbox.util.PDFStreamEngine.processStream(PDFStreamEngine.java:207) at org.apache.pdfbox.util.PDFTextStripper.processPage(PDFTextStripper.java:367) at org.apache.pdfbox.util.PDFTextStripper.processPages(PDFTextStripper.java:291) at org.apache.pdfbox.util.PDFTextStripper.writeText(PDFTextStripper.java:247) at pdfimage.Extractor.extractText(Extractor.java:253) at pdfimage.Extractor.extract(Extractor.java:139) at pdfimage.PdfImage.run(PdfImage.java:94) at pdfimage.PdfImage.main(PdfImage.java:114) 2010-06-01 16:09:09,062 WARN PDFStreamEngine: java.lang.IllegalArgumentException java.lang.IllegalArgumentException at org.apache.fontbox.cff.CFFParser.readCharset(CFFParser.java:511) at org.apache.fontbox.cff.CFFParser.parseFont(CFFParser.java:319) at org.apache.fontbox.cff.CFFParser.parse(CFFParser.java:65) at org.apache.pdfbox.pdmodel.font.PDType1CFont.ensureLoaded(PDType1CFont.java:290) at org.apache.pdfbox.pdmodel.font.PDType1CFont.getAverageFontWidth(PDType1CFont.java:242) at org.apache.pdfbox.util.PDFStreamEngine.processEncodedText(PDFStreamEngine.java:331) at org.apache.pdfbox.util.operator.ShowTextGlyph.process(ShowTextGlyph.java:61) at org.apache.pdfbox.util.PDFStreamEngine.processOperator(PDFStreamEngine.java:552) at org.apache.pdfbox.util.PDFStreamEngine.processSubStream(PDFStreamEngine.java:248) at org.apache.pdfbox.util.PDFStreamEngine.processStream(PDFStreamEngine.java:207) at org.apache.pdfbox.util.PDFTextStripper.processPage(PDFTextStripper.java:367) at org.apache.pdfbox.util.PDFTextStripper.processPages(PDFTextStripper.java:291) at org.apache.pdfbox.util.PDFTextStripper.writeText(PDFTextStripper.java:247) at pdfimage.Extractor.extractText(Extractor.java:253) at pdfimage.Extractor.extract(Extractor.java:139) at pdfimage.PdfImage.run(PdfImage.java:94) at pdfimage.PdfImage.main(PdfImage.java:114) 2010-06-01 16:09:09,062 WARN PDFStreamEngine: java.lang.IllegalArgumentException java.lang.IllegalArgumentException at org.apache.fontbox.cff.CFFParser.readCharset(CFFParser.java:511) at org.apache.fontbox.cff.CFFParser.parseFont(CFFParser.java:319) at org.apache.fontbox.cff.CFFParser.parse(CFFParser.java:65) at org.apache.pdfbox.pdmodel.font.PDType1CFont.ensureLoaded(PDType1CFont.java:290) at org.apache.pdfbox.pdmodel.font.PDType1CFont.getFontWidth(PDType1CFont.java:138) at org.apache.pdfbox.util.PDFStreamEngine.processEncodedText(PDFStreamEngine.java:323) at org.apache.pdfbox.util.operator.ShowTextGlyph.process(ShowTextGlyph.java:61) at org.apache.pdfbox.util.PDFStreamEngine.processOperator(PDFStreamEngine.java:552) at org.apache.pdfbox.util.PDFStreamEngine.processSubStream(PDFStreamEngine.java:248) at org.apache.pdfbox.util.PDFStreamEngine.processStream(PDFStreamEngine.java:207) at org.apache.pdfbox.util.PDFTextStripper.processPage(PDFTextStripper.java:367) at org.apache.pdfbox.util.PDFTextStripper.processPages(PDFTextStripper.java:291) at org.apache.pdfbox.util.PDFTextStripper.writeText(PDFTextStripper.java:247) at pdfimage.Extractor.extractText(Extractor.java:253) at pdfimage.Extractor.extract(Extractor.java:139) at pdfimage.PdfImage.run(PdfImage.java:94) at pdfimage.PdfImage.main(PdfImage.java:114) 2010-06-01 16:09:09,062 WARN PDFStreamEngine: java.lang.IllegalArgumentException java.lang.IllegalArgumentException at org.apache.fontbox.cff.CFFParser.readCharset(CFFParser.java:511) at org.apache.fontbox.cff.CFFParser.parseFont(CFFParser.java:319) at org.apache.fontbox.cff.CFFParser.parse(CFFParser.java:65) at org.apache.pdfbox.pdmodel.font.PDType1CFont.ensureLoaded(PDType1CFont.java:290) at org.apache.pdfbox.pdmodel.font.PDType1CFont.getAverageFontWidth(PDType1CFont.java:242) at org.apache.pdfbox.util.PDFStreamEngine.processEncodedText(PDFStreamEngine.java:331) at org.apache.pdfbox.util.operator.ShowTextGlyph.process(ShowTextGlyph.java:61) at org.apache.pdfbox.util.PDFStreamEngine.processOperator(PDFStreamEngine.java:552) at org.apache.pdfbox.util.PDFStreamEngine.processSubStream(PDFStreamEngine.java:248) at org.apache.pdfbox.util.PDFStreamEngine.processStream(PDFStreamEngine.java:207) at org.apache.pdfbox.util.PDFTextStripper.processPage(PDFTextStripper.java:367) at org.apache.pdfbox.util.PDFTextStripper.processPages(PDFTextStripper.java:291) at org.apache.pdfbox.util.PDFTextStripper.writeText(PDFTextStripper.java:247) at pdfimage.Extractor.extractText(Extractor.java:253) at pdfimage.Extractor.extract(Extractor.java:139) at pdfimage.PdfImage.run(PdfImage.java:94) at pdfimage.PdfImage.main(PdfImage.java:114) 2010-06-01 16:09:09,062 WARN PDFStreamEngine: java.lang.IllegalArgumentException java.lang.IllegalArgumentException at org.apache.fontbox.cff.CFFParser.readCharset(CFFParser.java:511) at org.apache.fontbox.cff.CFFParser.parseFont(CFFParser.java:319) at org.apache.fontbox.cff.CFFParser.parse(CFFParser.java:65) at org.apache.pdfbox.pdmodel.font.PDType1CFont.ensureLoaded(PDType1CFont.java:290) at org.apache.pdfbox.pdmodel.font.PDType1CFont.getFontWidth(PDType1CFont.java:138) at org.apache.pdfbox.util.PDFStreamEngine.processEncodedText(PDFStreamEngine.java:323) at org.apache.pdfbox.util.operator.ShowTextGlyph.process(ShowTextGlyph.java:61) at org.apache.pdfbox.util.PDFStreamEngine.processOperator(PDFStreamEngine.java:552) at org.apache.pdfbox.util.PDFStreamEngine.processSubStream(PDFStreamEngine.java:248) at org.apache.pdfbox.util.PDFStreamEngine.processStream(PDFStreamEngine.java:207) at org.apache.pdfbox.util.PDFTextStripper.processPage(PDFTextStripper.java:367) at org.apache.pdfbox.util.PDFTextStripper.processPages(PDFTextStripper.java:291) at org.apache.pdfbox.util.PDFTextStripper.writeText(PDFTextStripper.java:247) at pdfimage.Extractor.extractText(Extractor.java:253) at pdfimage.Extractor.extract(Extractor.java:139) at pdfimage.PdfImage.run(PdfImage.java:94) at pdfimage.PdfImage.main(PdfImage.java:114) 2010-06-01 16:09:09,062 WARN PDFStreamEngine: java.lang.IllegalArgumentException java.lang.IllegalArgumentException at org.apache.fontbox.cff.CFFParser.readCharset(CFFParser.java:511) at org.apache.fontbox.cff.CFFParser.parseFont(CFFParser.java:319) at org.apache.fontbox.cff.CFFParser.parse(CFFParser.java:65) at org.apache.pdfbox.pdmodel.font.PDType1CFont.ensureLoaded(PDType1CFont.java:290) at org.apache.pdfbox.pdmodel.font.PDType1CFont.getAverageFontWidth(PDType1CFont.java:242) at org.apache.pdfbox.util.PDFStreamEngine.processEncodedText(PDFStreamEngine.java:331) at org.apache.pdfbox.util.operator.ShowTextGlyph.process(ShowTextGlyph.java:61) at org.apache.pdfbox.util.PDFStreamEngine.processOperator(PDFStreamEngine.java:552) at org.apache.pdfbox.util.PDFStreamEngine.processSubStream(PDFStreamEngine.java:248) at org.apache.pdfbox.util.PDFStreamEngine.processStream(PDFStreamEngine.java:207) at org.apache.pdfbox.util.PDFTextStripper.processPage(PDFTextStripper.java:367) at org.apache.pdfbox.util.PDFTextStripper.processPages(PDFTextStripper.java:291) at org.apache.pdfbox.util.PDFTextStripper.writeText(PDFTextStripper.java:247) at pdfimage.Extractor.extractText(Extractor.java:253) at pdfimage.Extractor.extract(Extractor.java:139) at pdfimage.PdfImage.run(PdfImage.java:94) at pdfimage.PdfImage.main(PdfImage.java:114) 2010-06-01 16:09:09,062 WARN PDFStreamEngine: java.lang.IllegalArgumentException java.lang.IllegalArgumentException at org.apache.fontbox.cff.CFFParser.readCharset(CFFParser.java:511) at org.apache.fontbox.cff.CFFParser.parseFont(CFFParser.java:319) at org.apache.fontbox.cff.CFFParser.parse(CFFParser.java:65) at org.apache.pdfbox.pdmodel.font.PDType1CFont.ensureLoaded(PDType1CFont.java:290) at org.apache.pdfbox.pdmodel.font.PDType1CFont.getFontWidth(PDType1CFont.java:138) at org.apache.pdfbox.util.PDFStreamEngine.processEncodedText(PDFStreamEngine.java:323) at org.apache.pdfbox.util.operator.ShowTextGlyph.process(ShowTextGlyph.java:61) at org.apache.pdfbox.util.PDFStreamEngine.processOperator(PDFStreamEngine.java:552) at org.apache.pdfbox.util.PDFStreamEngine.processSubStream(PDFStreamEngine.java:248) at org.apache.pdfbox.util.PDFStreamEngine.processStream(PDFStreamEngine.java:207) at org.apache.pdfbox.util.PDFTextStripper.processPage(PDFTextStripper.java:367) at org.apache.pdfbox.util.PDFTextStripper.processPages(PDFTextStripper.java:291) at org.apache.pdfbox.util.PDFTextStripper.writeText(PDFTextStripper.java:247) at pdfimage.Extractor.extractText(Extractor.java:253) at pdfimage.Extractor.extract(Extractor.java:139) at pdfimage.PdfImage.run(PdfImage.java:94) at pdfimage.PdfImage.main(PdfImage.java:114) 2010-06-01 16:09:09,078 WARN PDFStreamEngine: java.lang.IllegalArgumentException java.lang.IllegalArgumentException at org.apache.fontbox.cff.CFFParser.readCharset(CFFParser.java:511) at org.apache.fontbox.cff.CFFParser.parseFont(CFFParser.java:319) at org.apache.fontbox.cff.CFFParser.parse(CFFParser.java:65) at org.apache.pdfbox.pdmodel.font.PDType1CFont.ensureLoaded(PDType1CFont.java:290) at org.apache.pdfbox.pdmodel.font.PDType1CFont.getAverageFontWidth(PDType1CFont.java:242) at org.apache.pdfbox.util.PDFStreamEngine.processEncodedText(PDFStreamEngine.java:331) at org.apache.pdfbox.util.operator.ShowTextGlyph.process(ShowTextGlyph.java:61) at org.apache.pdfbox.util.PDFStreamEngine.processOperator(PDFStreamEngine.java:552) at org.apache.pdfbox.util.PDFStreamEngine.processSubStream(PDFStreamEngine.java:248) at org.apache.pdfbox.util.PDFStreamEngine.processStream(PDFStreamEngine.java:207) at org.apache.pdfbox.util.PDFTextStripper.processPage(PDFTextStripper.java:367) at org.apache.pdfbox.util.PDFTextStripper.processPages(PDFTextStripper.java:291) at org.apache.pdfbox.util.PDFTextStripper.writeText(PDFTextStripper.java:247) at pdfimage.Extractor.extractText(Extractor.java:253) at pdfimage.Extractor.extract(Extractor.java:139) at pdfimage.PdfImage.run(PdfImage.java:94) at pdfimage.PdfImage.main(PdfImage.java:114) 2010-06-01 16:09:09,078 WARN PDFStreamEngine: java.lang.IllegalArgumentException java.lang.IllegalArgumentException at org.apache.fontbox.cff.CFFParser.readCharset(CFFParser.java:511) at org.apache.fontbox.cff.CFFParser.parseFont(CFFParser.java:319) at org.apache.fontbox.cff.CFFParser.parse(CFFParser.java:65) at org.apache.pdfbox.pdmodel.font.PDType1CFont.ensureLoaded(PDType1CFont.java:290) at org.apache.pdfbox.pdmodel.font.PDType1CFont.getFontWidth(PDType1CFont.java:138) at org.apache.pdfbox.util.PDFStreamEngine.processEncodedText(PDFStreamEngine.java:323) at org.apache.pdfbox.util.operator.ShowTextGlyph.process(ShowTextGlyph.java:61) at org.apache.pdfbox.util.PDFStreamEngine.processOperator(PDFStreamEngine.java:552) at org.apache.pdfbox.util.PDFStreamEngine.processSubStream(PDFStreamEngine.java:248) at org.apache.pdfbox.util.PDFStreamEngine.processStream(PDFStreamEngine.java:207) at org.apache.pdfbox.util.PDFTextStripper.processPage(PDFTextStripper.java:367) at org.apache.pdfbox.util.PDFTextStripper.processPages(PDFTextStripper.java:291) at org.apache.pdfbox.util.PDFTextStripper.writeText(PDFTextStripper.java:247) at pdfimage.Extractor.extractText(Extractor.java:253) at pdfimage.Extractor.extract(Extractor.java:139) at pdfimage.PdfImage.run(PdfImage.java:94) at pdfimage.PdfImage.main(PdfImage.java:114) 2010-06-01 16:09:09,078 WARN PDFStreamEngine: java.lang.IllegalArgumentException java.lang.IllegalArgumentException at org.apache.fontbox.cff.CFFParser.readCharset(CFFParser.java:511) at org.apache.fontbox.cff.CFFParser.parseFont(CFFParser.java:319) at org.apache.fontbox.cff.CFFParser.parse(CFFParser.java:65) at org.apache.pdfbox.pdmodel.font.PDType1CFont.ensureLoaded(PDType1CFont.java:290) at org.apache.pdfbox.pdmodel.font.PDType1CFont.getAverageFontWidth(PDType1CFont.java:242) at org.apache.pdfbox.util.PDFStreamEngine.processEncodedText(PDFStreamEngine.java:331) at org.apache.pdfbox.util.operator.ShowTextGlyph.process(ShowTextGlyph.java:61) at org.apache.pdfbox.util.PDFStreamEngine.processOperator(PDFStreamEngine.java:552) at org.apache.pdfbox.util.PDFStreamEngine.processSubStream(PDFStreamEngine.java:248) at org.apache.pdfbox.util.PDFStreamEngine.processStream(PDFStreamEngine.java:207) at org.apache.pdfbox.util.PDFTextStripper.processPage(PDFTextStripper.java:367) at org.apache.pdfbox.util.PDFTextStripper.processPages(PDFTextStripper.java:291) at org.apache.pdfbox.util.PDFTextStripper.writeText(PDFTextStripper.java:247) at pdfimage.Extractor.extractText(Extractor.java:253) at pdfimage.Extractor.extract(Extractor.java:139) at pdfimage.PdfImage.run(PdfImage.java:94) at pdfimage.PdfImage.main(PdfImage.java:114) 2010-06-01 16:09:09,078 WARN PDFStreamEngine: java.lang.IllegalArgumentException java.lang.IllegalArgumentException at org.apache.fontbox.cff.CFFParser.readCharset(CFFParser.java:511) at org.apache.fontbox.cff.CFFParser.parseFont(CFFParser.java:319) at org.apache.fontbox.cff.CFFParser.parse(CFFParser.java:65) at org.apache.pdfbox.pdmodel.font.PDType1CFont.ensureLoaded(PDType1CFont.java:290) at org.apache.pdfbox.pdmodel.font.PDType1CFont.getFontWidth(PDType1CFont.java:138) at org.apache.pdfbox.util.PDFStreamEngine.processEncodedText(PDFStreamEngine.java:323) at org.apache.pdfbox.util.operator.ShowText.process(ShowText.java:45) at org.apache.pdfbox.util.PDFStreamEngine.processOperator(PDFStreamEngine.java:552) at org.apache.pdfbox.util.PDFStreamEngine.processSubStream(PDFStreamEngine.java:248) at org.apache.pdfbox.util.PDFStreamEngine.processStream(PDFStreamEngine.java:207) at org.apache.pdfbox.util.PDFTextStripper.processPage(PDFTextStripper.java:367) at org.apache.pdfbox.util.PDFTextStripper.processPages(PDFTextStripper.java:291) at org.apache.pdfbox.util.PDFTextStripper.writeText(PDFTextStripper.java:247) at pdfimage.Extractor.extractText(Extractor.java:253) at pdfimage.Extractor.extract(Extractor.java:139) at pdfimage.PdfImage.run(PdfImage.java:94) at pdfimage.PdfImage.main(PdfImage.java:114) 2010-06-01 16:09:09,078 WARN PDFStreamEngine: java.lang.IllegalArgumentException java.lang.IllegalArgumentException at org.apache.fontbox.cff.CFFParser.readCharset(CFFParser.java:511) at org.apache.fontbox.cff.CFFParser.parseFont(CFFParser.java:319) at org.apache.fontbox.cff.CFFParser.parse(CFFParser.java:65) at org.apache.pdfbox.pdmodel.font.PDType1CFont.ensureLoaded(PDType1CFont.java:290) at org.apache.pdfbox.pdmodel.font.PDType1CFont.getAverageFontWidth(PDType1CFont.java:242) at org.apache.pdfbox.util.PDFStreamEngine.processEncodedText(PDFStreamEngine.java:331) at org.apache.pdfbox.util.operator.ShowText.process(ShowText.java:45) at org.apache.pdfbox.util.PDFStreamEngine.processOperator(PDFStreamEngine.java:552) at org.apache.pdfbox.util.PDFStreamEngine.processSubStream(PDFStreamEngine.java:248) at org.apache.pdfbox.util.PDFStreamEngine.processStream(PDFStreamEngine.java:207) at org.apache.pdfbox.util.PDFTextStripper.processPage(PDFTextStripper.java:367) at org.apache.pdfbox.util.PDFTextStripper.processPages(PDFTextStripper.java:291) at org.apache.pdfbox.util.PDFTextStripper.writeText(PDFTextStripper.java:247) at pdfimage.Extractor.extractText(Extractor.java:253) at pdfimage.Extractor.extract(Extractor.java:139) at pdfimage.PdfImage.run(PdfImage.java:94) at pdfimage.PdfImage.main(PdfImage.java:114) 2010-06-01 16:09:09,109 INFO Extractor: pages processed: 146 2010-06-01 16:09:09,109 INFO Extractor: images extracted: 9 2010-06-01 16:09:09,109 INFO Extractor: text extracted: 16 2010-06-01 16:09:09,109 INFO Extractor: processing time: 6 seconds 2010-06-01 16:09:09,157 INFO Recognizer: Recognizing BUDGET-2010-BUD.pdf ... 2010-06-01 16:09:14,741 INFO Recognizer: img2txt done. 2010-06-01 16:12:38,029 INFO Recognizer: pdf2iot1 (naive) done. 2010-06-01 16:12:50,932 INFO Recognizer: pdf2iot2 (split+merge) done. 2010-06-01 16:12:50,932 INFO Recognizer: processing time: 221 seconds 2010-06-01 16:12:50,932 INFO Extractor: Extracting from BUDGET-2010-TRANSMITTAL.pdf ... 2010-06-01 16:12:50,964 INFO Extractor: pages processed: 3 2010-06-01 16:12:50,964 INFO Extractor: images extracted: 3 2010-06-01 16:12:50,964 INFO Extractor: text extracted: 3 2010-06-01 16:12:50,964 INFO Extractor: processing time: 0 seconds 2010-06-01 16:12:50,964 INFO Recognizer: Recognizing BUDGET-2010-TRANSMITTAL.pdf ... 2010-06-01 16:12:53,943 INFO Recognizer: img2txt done. 2010-06-01 16:12:56,737 INFO Recognizer: pdf2iot1 (naive) done. 2010-06-01 16:12:59,546 INFO Recognizer: pdf2iot2 (split+merge) done. 2010-06-01 16:12:59,546 INFO Recognizer: processing time: 8 seconds 2010-06-01 16:12:59,546 INFO Extractor: Extracting from CDIR-2000-10-01-CAPITOL.pdf ... 2010-06-01 16:13:03,712 INFO Extractor: pages processed: 21 2010-06-01 16:13:03,712 INFO Extractor: images extracted: 5 2010-06-01 16:13:03,712 INFO Extractor: text extracted: 11 2010-06-01 16:13:03,712 INFO Extractor: processing time: 4 seconds 2010-06-01 16:13:03,712 INFO Recognizer: Recognizing CDIR-2000-10-01-CAPITOL.pdf ... 2010-06-01 16:13:12,418 INFO Recognizer: img2txt done. 2010-06-01 16:13:45,007 INFO Recognizer: pdf2iot1 (naive) done. 2010-06-01 16:13:52,761 INFO Recognizer: pdf2iot2 (split+merge) done. 2010-06-01 16:13:52,761 INFO Recognizer: processing time: 49 seconds 2010-06-01 16:13:52,761 INFO Extractor: Extracting from CDIR-2000-10-01-STATEMAP-CA.pdf ... 2010-06-01 16:13:53,511 INFO Extractor: pages processed: 1 2010-06-01 16:13:53,511 INFO Extractor: images extracted: 1 2010-06-01 16:13:53,511 INFO Extractor: text extracted: 1 2010-06-01 16:13:53,511 INFO Extractor: processing time: 0 seconds 2010-06-01 16:13:53,511 INFO Recognizer: Recognizing CDIR-2000-10-01-STATEMAP-CA.pdf ... 2010-06-01 16:13:54,572 INFO Recognizer: img2txt done. 2010-06-01 16:13:55,993 INFO Recognizer: pdf2iot1 (naive) done. 2010-06-01 16:13:57,429 INFO Recognizer: pdf2iot2 (split+merge) done. 2010-06-01 16:13:57,429 INFO Recognizer: processing time: 3 seconds 2010-06-01 16:13:57,429 INFO Extractor: Extracting from CDIR-2000-10-01-STATEMAP-VI.pdf ... 2010-06-01 16:13:57,867 INFO Extractor: pages processed: 1 2010-06-01 16:13:57,867 INFO Extractor: images extracted: 1 2010-06-01 16:13:57,867 INFO Extractor: text extracted: 1 2010-06-01 16:13:57,867 INFO Extractor: processing time: 0 seconds 2010-06-01 16:13:57,867 INFO Recognizer: Recognizing CDIR-2000-10-01-STATEMAP-VI.pdf ... 2010-06-01 16:13:58,662 INFO Recognizer: img2txt done. 2010-06-01 16:13:59,630 INFO Recognizer: pdf2iot1 (naive) done. 2010-06-01 16:14:00,613 INFO Recognizer: pdf2iot2 (split+merge) done. 2010-06-01 16:14:00,613 INFO Recognizer: processing time: 2 seconds 2010-06-01 16:14:00,613 INFO Extractor: Extracting from CDIR-2000-10-01.pdf ... 2010-06-01 16:14:44,981 INFO Extractor: pages processed: 1213 2010-06-01 16:14:44,981 INFO Extractor: images extracted: 62 2010-06-01 16:14:44,981 INFO Extractor: text extracted: 73 2010-06-01 16:14:44,981 INFO Extractor: processing time: 44 seconds 2010-06-01 16:14:45,060 INFO Recognizer: Recognizing CDIR-2000-10-01.pdf ... 2010-06-01 16:15:52,842 INFO Recognizer: img2txt done. 2010-06-01 16:54:49,852 INFO Recognizer: pdf2iot1 (naive) done. 2010-06-01 16:56:00,256 INFO Recognizer: pdf2iot2 (split+merge) done. 2010-06-01 16:56:00,256 INFO Recognizer: processing time: 2475 seconds 2010-06-01 16:56:00,256 INFO Extractor: Extracting from CDOC-111hdoc11.pdf ... 2010-06-01 16:56:04,984 INFO Extractor: pages processed: 1081 2010-06-01 16:56:04,984 INFO Extractor: images extracted: 1079 2010-06-01 16:56:04,984 INFO Extractor: text extracted: 1080 2010-06-01 16:56:04,984 INFO Extractor: processing time: 4 seconds 2010-06-01 16:56:05,094 INFO Recognizer: Recognizing CDOC-111hdoc11.pdf ... 2010-06-01 17:49:39,544 INFO Recognizer: img2txt done. 2010-06-01 19:34:14,648 INFO Recognizer: pdf2iot1 (naive) done. 2010-06-01 21:19:16,385 INFO Recognizer: pdf2iot2 (split+merge) done. 2010-06-01 21:19:16,385 INFO Recognizer: processing time: 15791 seconds 2010-06-01 21:19:16,385 INFO Extractor: Extracting from CDOC-111hdoc17-pt1.pdf ... 2010-06-01 21:52:35,730 INFO Extractor: pages processed: 1548 2010-06-01 21:52:35,730 INFO Extractor: images extracted: 1546 2010-06-01 21:52:35,730 INFO Extractor: text extracted: 1547 2010-06-01 21:52:35,730 INFO Extractor: processing time: 1999 seconds 2010-06-01 21:52:35,965 INFO Recognizer: Recognizing CDOC-111hdoc17-pt1.pdf ... 2010-06-01 22:41:22,254 INFO Recognizer: img2txt done. 2010-06-01 23:33:16,176 INFO Recognizer: pdf2iot1 (naive) done. 2010-06-02 00:24:47,123 INFO Recognizer: pdf2iot2 (split+merge) done. 2010-06-02 00:24:47,123 INFO Recognizer: processing time: 9131 seconds 2010-06-02 00:24:47,123 INFO Extractor: Extracting from CDOC-111hdoc17-pt2.pdf ... 2010-06-02 00:52:52,428 INFO Extractor: pages processed: 1345 2010-06-02 00:52:52,428 INFO Extractor: images extracted: 1343 2010-06-02 00:52:52,428 INFO Extractor: text extracted: 1344 2010-06-02 00:52:52,428 INFO Extractor: processing time: 1685 seconds 2010-06-02 00:52:52,569 INFO Recognizer: Recognizing CDOC-111hdoc17-pt2.pdf ... 2010-06-02 01:40:52,931 INFO Recognizer: img2txt done. 2010-06-02 02:36:09,451 INFO Recognizer: pdf2iot1 (naive) done. 2010-06-02 06:21:01,553 INFO Recognizer: pdf2iot2 (split+merge) done. 2010-06-02 06:21:01,553 INFO Recognizer: processing time: 19688 seconds 2010-06-02 06:21:01,554 INFO Extractor: Extracting from CDOC-111hdoc7.pdf ... 2010-06-02 06:21:02,918 INFO Extractor: pages processed: 38 2010-06-02 06:21:02,918 INFO Extractor: images extracted: 35 2010-06-02 06:21:02,918 INFO Extractor: text extracted: 36 2010-06-02 06:21:02,919 INFO Extractor: processing time: 1 seconds 2010-06-02 06:21:02,923 INFO Recognizer: Recognizing CDOC-111hdoc7.pdf ... 2010-06-02 06:23:35,279 INFO Recognizer: img2txt done. 2010-06-02 06:26:10,345 INFO Recognizer: pdf2iot1 (naive) done. 2010-06-02 06:28:42,810 INFO Recognizer: pdf2iot2 (split+merge) done. 2010-06-02 06:28:42,811 INFO Recognizer: processing time: 459 seconds 2010-06-02 06:28:42,811 INFO Extractor: Extracting from CFR-2009-title49-vol7.pdf ... 2010-06-02 06:28:54,711 INFO Extractor: pages processed: 666 2010-06-02 06:28:54,711 INFO Extractor: images extracted: 129 2010-06-02 06:28:54,711 INFO Extractor: text extracted: 176 2010-06-02 06:28:54,711 INFO Extractor: processing time: 11 seconds 2010-06-02 06:28:54,770 INFO Recognizer: Recognizing CFR-2009-title49-vol7.pdf ... 2010-06-02 06:32:41,886 INFO Recognizer: img2txt done. 2010-06-02 06:48:04,662 INFO Recognizer: pdf2iot1 (naive) done. 2010-06-02 06:50:33,285 INFO Recognizer: pdf2iot2 (split+merge) done. 2010-06-02 06:50:33,285 INFO Recognizer: processing time: 1298 seconds 2010-06-02 06:50:33,285 INFO Extractor: Extracting from CHRG-106shrg10636166.pdf ... 2010-06-02 06:50:33,499 INFO Extractor: pages processed: 78 2010-06-02 06:50:33,500 INFO Extractor: images extracted: 78 2010-06-02 06:50:33,500 INFO Extractor: text extracted: 78 2010-06-02 06:50:33,500 INFO Extractor: processing time: 0 seconds 2010-06-02 06:50:33,506 INFO Recognizer: Recognizing CHRG-106shrg10636166.pdf ... 2010-06-02 06:52:10,976 INFO Recognizer: img2txt done. 2010-06-02 06:53:38,122 INFO Recognizer: pdf2iot1 (naive) done. 2010-06-02 06:55:04,306 INFO Recognizer: pdf2iot2 (split+merge) done. 2010-06-02 06:55:04,306 INFO Recognizer: processing time: 270 seconds 2010-06-02 06:55:04,306 INFO Extractor: Extracting from CHRG-110hhrg11046861.pdf ... 2010-06-02 06:55:08,316 INFO Extractor: pages processed: 169 2010-06-02 06:55:08,316 INFO Extractor: images extracted: 83 2010-06-02 06:55:08,316 INFO Extractor: text extracted: 97 2010-06-02 06:55:08,316 INFO Extractor: processing time: 4 seconds 2010-06-02 06:55:08,330 INFO Recognizer: Recognizing CHRG-110hhrg11046861.pdf ... 2010-06-02 06:56:52,075 INFO Recognizer: img2txt done. 2010-06-02 07:00:44,671 INFO Recognizer: pdf2iot1 (naive) done. 2010-06-02 07:02:42,256 INFO Recognizer: pdf2iot2 (split+merge) done. 2010-06-02 07:02:42,256 INFO Recognizer: processing time: 453 seconds 2010-06-02 07:02:42,256 INFO Extractor: Extracting from CHRG-111hhrg11147258.pdf ... 2010-06-02 07:02:44,851 INFO Extractor: pages processed: 125 2010-06-02 07:02:44,851 INFO Extractor: images extracted: 75 2010-06-02 07:02:44,851 INFO Extractor: text extracted: 78 2010-06-02 07:02:44,851 INFO Extractor: processing time: 2 seconds 2010-06-02 07:02:44,860 INFO Recognizer: Recognizing CHRG-111hhrg11147258.pdf ... 2010-06-02 07:05:02,233 INFO Recognizer: img2txt done. 2010-06-02 07:08:13,475 INFO Recognizer: pdf2iot1 (naive) done. 2010-06-02 07:10:31,497 INFO Recognizer: pdf2iot2 (split+merge) done. 2010-06-02 07:10:31,497 INFO Recognizer: processing time: 466 seconds 2010-06-02 07:10:31,498 INFO Extractor: Extracting from CHRG-111hhrg11148055.pdf ... 2010-06-02 07:10:32,270 INFO Extractor: pages processed: 74 2010-06-02 07:10:32,270 INFO Extractor: images extracted: 12 2010-06-02 07:10:32,270 INFO Extractor: text extracted: 17 2010-06-02 07:10:32,270 INFO Extractor: processing time: 0 seconds 2010-06-02 07:10:32,278 INFO Recognizer: Recognizing CHRG-111hhrg11148055.pdf ... 2010-06-02 07:10:47,870 INFO Recognizer: img2txt done. 2010-06-02 07:12:34,744 INFO Recognizer: pdf2iot1 (naive) done. 2010-06-02 07:13:00,638 INFO Recognizer: pdf2iot2 (split+merge) done. 2010-06-02 07:13:00,638 INFO Recognizer: processing time: 148 seconds 2010-06-02 07:13:00,638 INFO Extractor: Extracting from CHRG-111hhrg11151898.pdf ... 2010-06-02 07:13:02,517 INFO Extractor: pages processed: 69 2010-06-02 07:13:02,517 INFO Extractor: images extracted: 33 2010-06-02 07:13:02,517 INFO Extractor: text extracted: 46 2010-06-02 07:13:02,517 INFO Extractor: processing time: 1 seconds 2010-06-02 07:13:02,523 INFO Recognizer: Recognizing CHRG-111hhrg11151898.pdf ... 2010-06-02 07:13:56,704 INFO Recognizer: img2txt done. 2010-06-02 07:15:25,658 INFO Recognizer: pdf2iot1 (naive) done. 2010-06-02 07:16:20,007 INFO Recognizer: pdf2iot2 (split+merge) done. 2010-06-02 07:16:20,007 INFO Recognizer: processing time: 197 seconds 2010-06-02 07:16:20,007 INFO Extractor: Extracting from CHRG-111shrg37-pt1.pdf ... 2010-06-02 07:16:20,332 INFO Extractor: pages processed: 58 2010-06-02 07:16:20,332 INFO Extractor: images extracted: 2 2010-06-02 07:16:20,332 INFO Extractor: text extracted: 4 2010-06-02 07:16:20,332 INFO Extractor: processing time: 0 seconds 2010-06-02 07:16:20,336 INFO Recognizer: Recognizing CHRG-111shrg37-pt1.pdf ... 2010-06-02 07:16:23,381 INFO Recognizer: img2txt done. 2010-06-02 07:17:37,368 INFO Recognizer: pdf2iot1 (naive) done. 2010-06-02 07:17:42,543 INFO Recognizer: pdf2iot2 (split+merge) done. 2010-06-02 07:17:42,543 INFO Recognizer: processing time: 82 seconds 2010-06-02 07:17:42,544 INFO Extractor: Extracting from CHRG-111shrg370.pdf ... 2010-06-02 07:17:48,000 INFO Extractor: pages processed: 193 2010-06-02 07:17:48,000 INFO Extractor: images extracted: 161 2010-06-02 07:17:48,000 INFO Extractor: text extracted: 162 2010-06-02 07:17:48,000 INFO Extractor: processing time: 5 seconds 2010-06-02 07:17:48,020 INFO Recognizer: Recognizing CHRG-111shrg370.pdf ... 2010-06-02 07:21:19,131 INFO Recognizer: img2txt done. 2010-06-02 07:25:36,454 INFO Recognizer: pdf2iot1 (naive) done. 2010-06-02 07:29:15,108 INFO Recognizer: pdf2iot2 (split+merge) done. 2010-06-02 07:29:15,108 INFO Recognizer: processing time: 687 seconds 2010-06-02 07:29:15,108 INFO Extractor: Extracting from CPRT-108SPRT90655.pdf ... 2010-06-02 07:29:15,775 INFO Extractor: pages processed: 134 2010-06-02 07:29:15,775 INFO Extractor: images extracted: 10 2010-06-02 07:29:15,775 INFO Extractor: text extracted: 14 2010-06-02 07:29:15,775 INFO Extractor: processing time: 0 seconds 2010-06-02 07:29:15,782 INFO Recognizer: Recognizing CPRT-108SPRT90655.pdf ... 2010-06-02 07:29:24,888 INFO Recognizer: img2txt done. 2010-06-02 07:32:24,765 INFO Recognizer: pdf2iot1 (naive) done. 2010-06-02 07:32:37,636 INFO Recognizer: pdf2iot2 (split+merge) done. 2010-06-02 07:32:37,636 INFO Recognizer: processing time: 201 seconds 2010-06-02 07:32:37,637 INFO Extractor: Extracting from CPRT-110HPRT44807-Part 3.pdf ... 2010-06-02 07:32:40,344 INFO Extractor: pages processed: 29 2010-06-02 07:32:40,344 INFO Extractor: images extracted: 4 2010-06-02 07:32:40,344 INFO Extractor: text extracted: 6 2010-06-02 07:32:40,344 INFO Extractor: processing time: 2 seconds 2010-06-02 07:32:40,347 INFO Recognizer: Recognizing CPRT-110HPRT44807-Part 3.pdf ... 2010-06-02 07:32:46,596 INFO Recognizer: img2txt done. 2010-06-02 07:33:14,361 INFO Recognizer: pdf2iot1 (naive) done. 2010-06-02 07:33:20,465 INFO Recognizer: pdf2iot2 (split+merge) done. 2010-06-02 07:33:20,465 INFO Recognizer: processing time: 40 seconds 2010-06-02 07:33:20,466 INFO Extractor: Extracting from CPRT-110HPRT44807-Part 4.pdf ... 2010-06-02 07:33:29,063 INFO Extractor: pages processed: 518 2010-06-02 07:33:29,063 INFO Extractor: images extracted: 243 2010-06-02 07:33:29,063 INFO Extractor: text extracted: 313 2010-06-02 07:33:29,063 INFO Extractor: processing time: 8 seconds 2010-06-02 07:33:29,102 INFO Recognizer: Recognizing CPRT-110HPRT44807-Part 4.pdf ... 2010-06-02 07:38:57,895 INFO Recognizer: img2txt done. 2010-06-02 07:51:51,364 INFO Recognizer: pdf2iot1 (naive) done. 2010-06-02 07:57:25,214 INFO Recognizer: pdf2iot2 (split+merge) done. 2010-06-02 07:57:25,214 INFO Recognizer: processing time: 1436 seconds 2010-06-02 07:57:25,214 INFO Extractor: Extracting from CPRT-110HPRT44807-Part 5.pdf ... 2010-06-02 07:57:33,355 INFO Extractor: pages processed: 132 2010-06-02 07:57:33,356 INFO Extractor: images extracted: 13 2010-06-02 07:57:33,356 INFO Extractor: text extracted: 14 2010-06-02 07:57:33,356 INFO Extractor: processing time: 8 seconds 2010-06-02 07:57:33,367 INFO Recognizer: Recognizing CPRT-110HPRT44807-Part 5.pdf ... 2010-06-02 07:58:05,244 INFO Recognizer: img2txt done. 2010-06-02 08:00:48,475 INFO Recognizer: pdf2iot1 (naive) done. 2010-06-02 08:01:20,630 INFO Recognizer: pdf2iot2 (split+merge) done. 2010-06-02 08:01:20,630 INFO Recognizer: processing time: 227 seconds 2010-06-02 08:01:20,630 INFO Extractor: Extracting from CPRT-110HPRT44807-Part 6.pdf ... 2010-06-02 08:01:35,297 INFO Extractor: pages processed: 113 2010-06-02 08:01:35,297 INFO Extractor: images extracted: 24 2010-06-02 08:01:35,298 INFO Extractor: text extracted: 25 2010-06-02 08:01:35,298 INFO Extractor: processing time: 14 seconds 2010-06-02 08:01:35,307 INFO Recognizer: Recognizing CPRT-110HPRT44807-Part 6.pdf ... 2010-06-02 08:02:14,375 INFO Recognizer: img2txt done. 2010-06-02 08:05:12,210 INFO Recognizer: pdf2iot1 (naive) done. 2010-06-02 08:05:49,182 INFO Recognizer: pdf2iot2 (split+merge) done. 2010-06-02 08:05:49,183 INFO Recognizer: processing time: 253 seconds 2010-06-02 08:05:49,183 INFO Extractor: Extracting from CPRT-111HPRT54329.pdf ... 2010-06-02 08:06:15,496 INFO Extractor: pages processed: 366 2010-06-02 08:06:15,496 INFO Extractor: images extracted: 285 2010-06-02 08:06:15,496 INFO Extractor: text extracted: 301 2010-06-02 08:06:15,496 INFO Extractor: processing time: 26 seconds 2010-06-02 08:06:15,570 INFO Recognizer: Recognizing CPRT-111HPRT54329.pdf ... 2010-06-02 08:18:42,083 INFO Recognizer: img2txt done. 2010-06-02 08:35:21,084 INFO Recognizer: pdf2iot1 (naive) done. 2010-06-02 08:50:09,949 INFO Recognizer: pdf2iot2 (split+merge) done. 2010-06-02 08:50:09,949 INFO Recognizer: processing time: 2634 seconds 2010-06-02 08:50:09,950 INFO Extractor: Extracting from CPRT-111SPRT47215.pdf ... 2010-06-02 08:50:12,174 INFO Extractor: pages processed: 66 2010-06-02 08:50:12,174 INFO Extractor: images extracted: 2 2010-06-02 08:50:12,174 INFO Extractor: text extracted: 6 2010-06-02 08:50:12,174 INFO Extractor: processing time: 2 seconds 2010-06-02 08:50:12,178 INFO Recognizer: Recognizing CPRT-111SPRT47215.pdf ... 2010-06-02 08:50:14,785 INFO Recognizer: img2txt done. 2010-06-02 08:51:27,502 INFO Recognizer: pdf2iot1 (naive) done. 2010-06-02 08:51:31,137 INFO Recognizer: pdf2iot2 (split+merge) done. 2010-06-02 08:51:31,137 INFO Recognizer: processing time: 78 seconds 2010-06-02 08:51:31,138 INFO Extractor: Extracting from CPRT-111SPRT48170.pdf ... 2010-06-02 08:55:28,122 INFO Extractor: pages processed: 269 2010-06-02 08:55:28,122 INFO Extractor: images extracted: 263 2010-06-02 08:55:28,122 INFO Extractor: text extracted: 264 2010-06-02 08:55:28,122 INFO Extractor: processing time: 236 seconds 2010-06-02 08:55:28,146 INFO Recognizer: Recognizing CPRT-111SPRT48170.pdf ... 2010-06-02 09:02:34,321 INFO Recognizer: img2txt done. 2010-06-02 09:09:50,755 INFO Recognizer: pdf2iot1 (naive) done. 2010-06-02 09:17:03,463 INFO Recognizer: pdf2iot2 (split+merge) done. 2010-06-02 09:17:03,463 INFO Recognizer: processing time: 1295 seconds 2010-06-02 09:17:03,463 INFO Extractor: Extracting from CPRT-111SPRT51207.pdf ... 2010-06-02 09:17:03,607 INFO Extractor: pages processed: 20 2010-06-02 09:17:03,607 INFO Extractor: images extracted: 2 2010-06-02 09:17:03,607 INFO Extractor: text extracted: 3 2010-06-02 09:17:03,607 INFO Extractor: processing time: 0 seconds 2010-06-02 09:17:03,610 INFO Recognizer: Recognizing CPRT-111SPRT51207.pdf ... 2010-06-02 09:17:09,192 INFO Recognizer: img2txt done. 2010-06-02 09:17:31,608 INFO Recognizer: pdf2iot1 (naive) done. 2010-06-02 09:17:36,501 INFO Recognizer: pdf2iot2 (split+merge) done. 2010-06-02 09:17:36,502 INFO Recognizer: processing time: 32 seconds 2010-06-02 09:17:36,502 INFO Extractor: Extracting from CPRT-111SPRT51233.pdf ... 2010-06-02 09:17:36,515 INFO Extractor: pages processed: 22 2010-06-02 09:17:36,515 INFO Extractor: images extracted: 0 2010-06-02 09:17:36,515 INFO Extractor: text extracted: 0 2010-06-02 09:17:36,515 INFO Extractor: processing time: 0 seconds 2010-06-02 09:17:36,519 INFO Recognizer: Recognizing CPRT-111SPRT51233.pdf ... 2010-06-02 09:17:36,519 INFO Recognizer: img2txt done. 2010-06-02 09:17:36,520 DEBUG Recognizer: Copy CPRT-111SPRT51233.pdf to CPRT-111SPRT51233.1.pdf because it has no image. 2010-06-02 09:17:36,520 INFO Recognizer: pdf2iot1 (naive) done. 2010-06-02 09:17:36,521 DEBUG Recognizer: Copy CPRT-111SPRT51233.pdf to CPRT-111SPRT51233.2.pdf because it has no image. 2010-06-02 09:17:36,521 INFO Recognizer: pdf2iot2 (split+merge) done. 2010-06-02 09:17:36,521 INFO Recognizer: processing time: 0 seconds 2010-06-02 09:17:36,521 INFO Extractor: Extracting from CREC-1996-01-22.pdf ... 2010-06-02 09:22:11,681 INFO Extractor: pages processed: 538 2010-06-02 09:22:11,682 INFO Extractor: images extracted: 132 2010-06-02 09:22:11,682 INFO Extractor: text extracted: 168 2010-06-02 09:22:11,682 INFO Extractor: processing time: 275 seconds 2010-06-02 09:22:11,729 INFO Recognizer: Recognizing CREC-1996-01-22.pdf ... 2010-06-02 09:31:37,195 INFO Recognizer: img2txt done. 2010-06-02 10:03:47,355 INFO Recognizer: pdf2iot1 (naive) done. 2010-06-02 10:14:28,281 INFO Recognizer: pdf2iot2 (split+merge) done. 2010-06-02 10:14:28,281 INFO Recognizer: processing time: 3136 seconds 2010-06-02 10:14:28,282 INFO Extractor: Extracting from CRECB-2001-pt20-issue-2001-12-19.pdf ... 2010-06-02 10:26:21,180 INFO Extractor: pages processed: 706 2010-06-02 10:26:21,180 INFO Extractor: images extracted: 252 2010-06-02 10:26:21,180 INFO Extractor: text extracted: 275 2010-06-02 10:26:21,180 INFO Extractor: processing time: 712 seconds 2010-06-02 10:26:21,246 INFO Recognizer: Recognizing CRECB-2001-pt20-issue-2001-12-19.pdf ... 2010-06-02 10:36:27,419 INFO Recognizer: img2txt done. 2010-06-02 11:06:07,197 INFO Recognizer: pdf2iot1 (naive) done. 2010-06-02 11:12:16,775 INFO Recognizer: pdf2iot2 (split+merge) done. 2010-06-02 11:12:16,775 INFO Recognizer: processing time: 2755 seconds 2010-06-02 11:12:16,775 INFO Extractor: Extracting from CRECB-2001-pt20.pdf ... 2010-06-02 11:24:50,512 INFO Extractor: pages processed: 1284 2010-06-02 11:24:50,512 INFO Extractor: images extracted: 262 2010-06-02 11:24:50,512 INFO Extractor: text extracted: 287 2010-06-02 11:24:50,512 INFO Extractor: processing time: 753 seconds 2010-06-02 11:24:50,630 INFO Recognizer: Recognizing CRECB-2001-pt20.pdf ... 2010-06-02 11:35:30,194 INFO Recognizer: img2txt done. 2010-06-02 12:37:37,399 INFO Recognizer: pdf2iot1 (naive) done. 2010-06-02 12:44:18,097 INFO Recognizer: pdf2iot2 (split+merge) done. 2010-06-02 12:44:18,097 INFO Recognizer: processing time: 4767 seconds 2010-06-02 12:44:18,097 INFO Extractor: Extracting from CRPT-108hrpt490-pt1.pdf ... 2010-06-02 12:44:18,609 INFO Extractor: pages processed: 45 2010-06-02 12:44:18,609 INFO Extractor: images extracted: 8 2010-06-02 12:44:18,609 INFO Extractor: text extracted: 12 2010-06-02 12:44:18,609 INFO Extractor: processing time: 0 seconds 2010-06-02 12:44:18,613 INFO Recognizer: Recognizing CRPT-108hrpt490-pt1.pdf ... 2010-06-02 12:44:37,931 INFO Recognizer: img2txt done. 2010-06-02 12:45:39,343 INFO Recognizer: pdf2iot1 (naive) done. 2010-06-02 12:46:00,313 INFO Recognizer: pdf2iot2 (split+merge) done. 2010-06-02 12:46:00,313 INFO Recognizer: processing time: 101 seconds 2010-06-02 12:46:00,314 INFO Extractor: Extracting from CRPT-108hrpt490-pt2.pdf ... 2010-06-02 12:46:00,350 WARN PDFStreamEngine: java.lang.ArrayIndexOutOfBoundsException: 5 java.lang.ArrayIndexOutOfBoundsException: 5 at org.apache.fontbox.cff.CFFParser$IndexData.getBytes(CFFParser.java:585) at org.apache.fontbox.cff.CFFParser.parseFont(CFFParser.java:329) at org.apache.fontbox.cff.CFFParser.parse(CFFParser.java:65) at org.apache.pdfbox.pdmodel.font.PDType1CFont.ensureLoaded(PDType1CFont.java:290) at org.apache.pdfbox.pdmodel.font.PDType1CFont.getFontWidth(PDType1CFont.java:138) at org.apache.pdfbox.util.PDFStreamEngine.processEncodedText(PDFStreamEngine.java:323) at org.apache.pdfbox.util.operator.ShowText.process(ShowText.java:45) at org.apache.pdfbox.util.PDFStreamEngine.processOperator(PDFStreamEngine.java:552) at org.apache.pdfbox.util.PDFStreamEngine.processSubStream(PDFStreamEngine.java:248) at org.apache.pdfbox.util.PDFStreamEngine.processStream(PDFStreamEngine.java:207) at org.apache.pdfbox.util.PDFTextStripper.processPage(PDFTextStripper.java:367) at org.apache.pdfbox.util.PDFTextStripper.processPages(PDFTextStripper.java:291) at org.apache.pdfbox.util.PDFTextStripper.writeText(PDFTextStripper.java:247) at pdfimage.Extractor.extractText(Extractor.java:253) at pdfimage.Extractor.extract(Extractor.java:138) at pdfimage.PdfImage.run(PdfImage.java:94) at pdfimage.PdfImage.main(PdfImage.java:114) 2010-06-02 12:46:00,351 WARN PDFStreamEngine: java.lang.ArrayIndexOutOfBoundsException: 5 java.lang.ArrayIndexOutOfBoundsException: 5 at org.apache.fontbox.cff.CFFParser$IndexData.getBytes(CFFParser.java:585) at org.apache.fontbox.cff.CFFParser.parseFont(CFFParser.java:329) at org.apache.fontbox.cff.CFFParser.parse(CFFParser.java:65) at org.apache.pdfbox.pdmodel.font.PDType1CFont.ensureLoaded(PDType1CFont.java:290) at org.apache.pdfbox.pdmodel.font.PDType1CFont.getAverageFontWidth(PDType1CFont.java:242) at org.apache.pdfbox.util.PDFStreamEngine.processEncodedText(PDFStreamEngine.java:331) at org.apache.pdfbox.util.operator.ShowText.process(ShowText.java:45) at org.apache.pdfbox.util.PDFStreamEngine.processOperator(PDFStreamEngine.java:552) at org.apache.pdfbox.util.PDFStreamEngine.processSubStream(PDFStreamEngine.java:248) at org.apache.pdfbox.util.PDFStreamEngine.processStream(PDFStreamEngine.java:207) at org.apache.pdfbox.util.PDFTextStripper.processPage(PDFTextStripper.java:367) at org.apache.pdfbox.util.PDFTextStripper.processPages(PDFTextStripper.java:291) at org.apache.pdfbox.util.PDFTextStripper.writeText(PDFTextStripper.java:247) at pdfimage.Extractor.extractText(Extractor.java:253) at pdfimage.Extractor.extract(Extractor.java:138) at pdfimage.PdfImage.run(PdfImage.java:94) at pdfimage.PdfImage.main(PdfImage.java:114) 2010-06-02 12:46:00,446 INFO Extractor: pages processed: 3 2010-06-02 12:46:00,446 INFO Extractor: images extracted: 1 2010-06-02 12:46:00,446 INFO Extractor: text extracted: 3 2010-06-02 12:46:00,446 INFO Extractor: processing time: 0 seconds 2010-06-02 12:46:00,448 INFO Recognizer: Recognizing CRPT-108hrpt490-pt2.pdf ... 2010-06-02 12:46:02,043 INFO Recognizer: img2txt done. 2010-06-02 12:46:04,721 INFO Recognizer: pdf2iot1 (naive) done. 2010-06-02 12:46:06,274 INFO Recognizer: pdf2iot2 (split+merge) done. 2010-06-02 12:46:06,274 INFO Recognizer: processing time: 5 seconds 2010-06-02 12:46:06,275 INFO Extractor: Extracting from CRPT-110srpt251.pdf ... 2010-06-02 12:46:10,005 INFO Extractor: pages processed: 234 2010-06-02 12:46:10,006 INFO Extractor: images extracted: 54 2010-06-02 12:46:10,006 INFO Extractor: text extracted: 80 2010-06-02 12:46:10,006 INFO Extractor: processing time: 3 seconds 2010-06-02 12:46:10,023 INFO Recognizer: Recognizing CRPT-110srpt251.pdf ... 2010-06-02 12:46:48,169 INFO Recognizer: img2txt done. 2010-06-02 12:54:54,935 INFO Recognizer: pdf2iot1 (naive) done. 2010-06-02 12:59:39,509 INFO Recognizer: pdf2iot2 (split+merge) done. 2010-06-02 12:59:39,509 INFO Recognizer: processing time: 809 seconds 2010-06-02 12:59:39,509 INFO Extractor: Extracting from ECONI-1910-02-Pg1.pdf ... 2010-06-02 12:59:39,588 INFO Extractor: pages processed: 1 2010-06-02 12:59:39,588 INFO Extractor: images extracted: 1 2010-06-02 12:59:39,588 INFO Extractor: text extracted: 1 2010-06-02 12:59:39,588 INFO Extractor: processing time: 0 seconds 2010-06-02 12:59:39,589 INFO Recognizer: Recognizing ECONI-1910-02-Pg1.pdf ... 2010-06-02 12:59:41,617 INFO Recognizer: img2txt done. 2010-06-02 12:59:46,309 INFO Recognizer: pdf2iot1 (naive) done. 2010-06-02 12:59:51,015 INFO Recognizer: pdf2iot2 (split+merge) done. 2010-06-02 12:59:51,015 INFO Recognizer: processing time: 11 seconds 2010-06-02 12:59:51,016 INFO Extractor: Extracting from ECONI-1910-02-Pg37.pdf ... 2010-06-02 12:59:51,094 INFO Extractor: pages processed: 1 2010-06-02 12:59:51,094 INFO Extractor: images extracted: 1 2010-06-02 12:59:51,094 INFO Extractor: text extracted: 1 2010-06-02 12:59:51,094 INFO Extractor: processing time: 0 seconds 2010-06-02 12:59:51,096 INFO Recognizer: Recognizing ECONI-1910-02-Pg37.pdf ... 2010-06-02 12:59:52,726 INFO Recognizer: img2txt done. 2010-06-02 12:59:56,180 INFO Recognizer: pdf2iot1 (naive) done. 2010-06-02 12:59:59,650 INFO Recognizer: pdf2iot2 (split+merge) done. 2010-06-02 12:59:59,650 INFO Recognizer: processing time: 8 seconds 2010-06-02 12:59:59,651 INFO Extractor: Extracting from ECONI-1995-04-Pg1.pdf ... 2010-06-02 13:00:00,085 INFO Extractor: pages processed: 1 2010-06-02 13:00:00,085 INFO Extractor: images extracted: 1 2010-06-02 13:00:00,085 INFO Extractor: text extracted: 1 2010-06-02 13:00:00,085 INFO Extractor: processing time: 0 seconds 2010-06-02 13:00:00,086 INFO Recognizer: Recognizing ECONI-1995-04-Pg1.pdf ... 2010-06-02 13:00:01,858 INFO Recognizer: img2txt done. 2010-06-02 13:00:06,185 INFO Recognizer: pdf2iot1 (naive) done. 2010-06-02 13:00:10,577 INFO Recognizer: pdf2iot2 (split+merge) done. 2010-06-02 13:00:10,577 INFO Recognizer: processing time: 10 seconds 2010-06-02 13:00:10,577 INFO Extractor: Extracting from ECONI-1995-04-Pg13.pdf ... 2010-06-02 13:00:11,061 INFO Extractor: pages processed: 1 2010-06-02 13:00:11,062 INFO Extractor: images extracted: 1 2010-06-02 13:00:11,062 INFO Extractor: text extracted: 1 2010-06-02 13:00:11,062 INFO Extractor: processing time: 0 seconds 2010-06-02 13:00:11,063 INFO Recognizer: Recognizing ECONI-1995-04-Pg13.pdf ... 2010-06-02 13:00:14,774 INFO Recognizer: img2txt done. 2010-06-02 13:00:21,376 INFO Recognizer: pdf2iot1 (naive) done. 2010-06-02 13:00:27,818 INFO Recognizer: pdf2iot2 (split+merge) done. 2010-06-02 13:00:27,818 INFO Recognizer: processing time: 16 seconds 2010-06-02 13:00:27,819 INFO Extractor: Extracting from ERP-1996-other-2.pdf ... 2010-06-02 13:00:28,182 INFO Extractor: pages processed: 3 2010-06-02 13:00:28,182 INFO Extractor: images extracted: 3 2010-06-02 13:00:28,182 INFO Extractor: text extracted: 2 2010-06-02 13:00:28,182 INFO Extractor: processing time: 0 seconds 2010-06-02 13:00:28,183 INFO Recognizer: Recognizing ERP-1996-other-2.pdf ... 2010-06-02 13:00:29,283 INFO Recognizer: img2txt done. 2010-06-02 13:00:30,623 INFO Recognizer: pdf2iot1 (naive) done. 2010-06-02 13:00:31,553 INFO Recognizer: pdf2iot2 (split+merge) done. 2010-06-02 13:00:31,553 INFO Recognizer: processing time: 3 seconds 2010-06-02 13:00:31,554 INFO Extractor: Extracting from ERP-2009-chapter1.pdf ... 2010-06-02 13:01:07,968 INFO Extractor: pages processed: 29 2010-06-02 13:01:07,968 INFO Extractor: images extracted: 10 2010-06-02 13:01:07,968 INFO Extractor: text extracted: 20 2010-06-02 13:01:07,968 INFO Extractor: processing time: 36 seconds 2010-06-02 13:01:07,972 INFO Recognizer: Recognizing ERP-2009-chapter1.pdf ... 2010-06-02 13:01:27,985 INFO Recognizer: img2txt done. 2010-06-02 13:02:02,536 INFO Recognizer: pdf2iot1 (naive) done. 2010-06-02 13:02:21,688 INFO Recognizer: pdf2iot2 (split+merge) done. 2010-06-02 13:02:21,688 INFO Recognizer: processing time: 73 seconds 2010-06-02 13:02:21,688 INFO Extractor: Extracting from ERP-2009-chapter2.pdf ... 2010-06-02 13:02:54,300 INFO Extractor: pages processed: 36 2010-06-02 13:02:54,301 INFO Extractor: images extracted: 9 2010-06-02 13:02:54,301 INFO Extractor: text extracted: 20 2010-06-02 13:02:54,301 INFO Extractor: processing time: 32 seconds 2010-06-02 13:02:54,305 INFO Recognizer: Recognizing ERP-2009-chapter2.pdf ... 2010-06-02 13:03:11,437 INFO Recognizer: img2txt done. 2010-06-02 13:03:52,076 INFO Recognizer: pdf2iot1 (naive) done. 2010-06-02 13:04:08,819 INFO Recognizer: pdf2iot2 (split+merge) done. 2010-06-02 13:04:08,819 INFO Recognizer: processing time: 74 seconds 2010-06-02 13:04:08,819 INFO Extractor: Extracting from ERP-2009-chapter3.pdf ... 2010-06-02 13:04:27,869 INFO Extractor: pages processed: 30 2010-06-02 13:04:27,869 INFO Extractor: images extracted: 6 2010-06-02 13:04:27,869 INFO Extractor: text extracted: 15 2010-06-02 13:04:27,870 INFO Extractor: processing time: 19 seconds 2010-06-02 13:04:27,876 INFO Recognizer: Recognizing ERP-2009-chapter3.pdf ... 2010-06-02 13:04:38,977 INFO Recognizer: img2txt done. 2010-06-02 13:05:13,597 INFO Recognizer: pdf2iot1 (naive) done. 2010-06-02 13:05:25,847 INFO Recognizer: pdf2iot2 (split+merge) done. 2010-06-02 13:05:25,847 INFO Recognizer: processing time: 57 seconds 2010-06-02 13:05:25,847 INFO Extractor: Extracting from ERP-2009-chapter4.pdf ... 2010-06-02 13:05:48,683 INFO Extractor: pages processed: 24 2010-06-02 13:05:48,683 INFO Extractor: images extracted: 5 2010-06-02 13:05:48,683 INFO Extractor: text extracted: 12 2010-06-02 13:05:48,683 INFO Extractor: processing time: 22 seconds 2010-06-02 13:05:48,689 INFO Recognizer: Recognizing ERP-2009-chapter4.pdf ... 2010-06-02 13:05:58,737 INFO Recognizer: img2txt done. 2010-06-02 13:06:25,087 INFO Recognizer: pdf2iot1 (naive) done. 2010-06-02 13:06:33,706 INFO Recognizer: pdf2iot2 (split+merge) done. 2010-06-02 13:06:33,706 INFO Recognizer: processing time: 45 seconds 2010-06-02 13:06:33,706 INFO Extractor: Extracting from ERP-2009-chapter5.pdf ... 2010-06-02 13:06:48,454 INFO Extractor: pages processed: 23 2010-06-02 13:06:48,454 INFO Extractor: images extracted: 4 2010-06-02 13:06:48,454 INFO Extractor: text extracted: 11 2010-06-02 13:06:48,455 INFO Extractor: processing time: 14 seconds 2010-06-02 13:06:48,458 INFO Recognizer: Recognizing ERP-2009-chapter5.pdf ... 2010-06-02 13:06:56,077 INFO Recognizer: img2txt done. 2010-06-02 13:07:20,232 INFO Recognizer: pdf2iot1 (naive) done. 2010-06-02 13:07:28,302 INFO Recognizer: pdf2iot2 (split+merge) done. 2010-06-02 13:07:28,302 INFO Recognizer: processing time: 39 seconds 2010-06-02 13:07:28,302 INFO Extractor: Extracting from ERP-2009-chapter6.pdf ... 2010-06-02 13:07:42,895 INFO Extractor: pages processed: 21 2010-06-02 13:07:42,895 INFO Extractor: images extracted: 4 2010-06-02 13:07:42,895 INFO Extractor: text extracted: 10 2010-06-02 13:07:42,895 INFO Extractor: processing time: 14 seconds 2010-06-02 13:07:42,900 INFO Recognizer: Recognizing ERP-2009-chapter6.pdf ... 2010-06-02 13:07:50,678 INFO Recognizer: img2txt done. 2010-06-02 13:08:13,820 INFO Recognizer: pdf2iot1 (naive) done. 2010-06-02 13:08:21,862 INFO Recognizer: pdf2iot2 (split+merge) done. 2010-06-02 13:08:21,862 INFO Recognizer: processing time: 38 seconds 2010-06-02 13:08:21,863 INFO Extractor: Extracting from ERP-2009-chapter7.pdf ... 2010-06-02 13:08:36,511 INFO Extractor: pages processed: 20 2010-06-02 13:08:36,511 INFO Extractor: images extracted: 4 2010-06-02 13:08:36,511 INFO Extractor: text extracted: 9 2010-06-02 13:08:36,511 INFO Extractor: processing time: 14 seconds 2010-06-02 13:08:36,514 INFO Recognizer: Recognizing ERP-2009-chapter7.pdf ... 2010-06-02 13:08:45,441 INFO Recognizer: img2txt done. 2010-06-02 13:09:08,738 INFO Recognizer: pdf2iot1 (naive) done. 2010-06-02 13:09:16,852 INFO Recognizer: pdf2iot2 (split+merge) done. 2010-06-02 13:09:16,852 INFO Recognizer: processing time: 40 seconds 2010-06-02 13:09:16,853 INFO Extractor: Extracting from ERP-2009-chapter8.pdf ... 2010-06-02 13:09:31,006 INFO Extractor: pages processed: 22 2010-06-02 13:09:31,006 INFO Extractor: images extracted: 4 2010-06-02 13:09:31,006 INFO Extractor: text extracted: 10 2010-06-02 13:09:31,006 INFO Extractor: processing time: 14 seconds 2010-06-02 13:09:31,009 INFO Recognizer: Recognizing ERP-2009-chapter8.pdf ... 2010-06-02 13:09:38,644 INFO Recognizer: img2txt done. 2010-06-02 13:10:01,542 INFO Recognizer: pdf2iot1 (naive) done. 2010-06-02 13:10:09,404 INFO Recognizer: pdf2iot2 (split+merge) done. 2010-06-02 13:10:09,405 INFO Recognizer: processing time: 38 seconds 2010-06-02 13:10:09,405 INFO Extractor: Extracting from ERP-2009-chapter9.pdf ... 2010-06-02 13:10:23,194 INFO Extractor: pages processed: 22 2010-06-02 13:10:23,195 INFO Extractor: images extracted: 4 2010-06-02 13:10:23,195 INFO Extractor: text extracted: 11 2010-06-02 13:10:23,195 INFO Extractor: processing time: 13 seconds 2010-06-02 13:10:23,197 INFO Recognizer: Recognizing ERP-2009-chapter9.pdf ... 2010-06-02 13:10:30,682 INFO Recognizer: img2txt done. 2010-06-02 13:10:53,178 INFO Recognizer: pdf2iot1 (naive) done. 2010-06-02 13:11:00,616 INFO Recognizer: pdf2iot2 (split+merge) done. 2010-06-02 13:11:00,616 INFO Recognizer: processing time: 37 seconds 2010-06-02 13:11:00,617 INFO Extractor: Extracting from ERP-2009-frontmatter.pdf ... 2010-06-02 13:11:00,831 INFO Extractor: pages processed: 15 2010-06-02 13:11:00,831 INFO Extractor: images extracted: 4 2010-06-02 13:11:00,831 INFO Extractor: text extracted: 7 2010-06-02 13:11:00,831 INFO Extractor: processing time: 0 seconds 2010-06-02 13:11:00,834 INFO Recognizer: Recognizing ERP-2009-frontmatter.pdf ... 2010-06-02 13:11:03,661 INFO Recognizer: img2txt done. 2010-06-02 13:11:11,584 INFO Recognizer: pdf2iot1 (naive) done. 2010-06-02 13:11:14,530 INFO Recognizer: pdf2iot2 (split+merge) done. 2010-06-02 13:11:14,530 INFO Recognizer: processing time: 13 seconds 2010-06-02 13:11:14,530 INFO Extractor: Extracting from ERP-2009.pdf ... 2010-06-02 13:11:21,007 INFO Extractor: pages processed: 419 2010-06-02 13:11:21,007 INFO Extractor: images extracted: 55 2010-06-02 13:11:21,007 INFO Extractor: text extracted: 127 2010-06-02 13:11:21,007 INFO Extractor: processing time: 6 seconds 2010-06-02 13:11:21,047 INFO Recognizer: Recognizing ERP-2009.pdf ... 2010-06-02 13:12:23,465 INFO Recognizer: img2txt done. 2010-06-02 13:24:58,232 INFO Recognizer: pdf2iot1 (naive) done. 2010-06-02 13:26:35,656 INFO Recognizer: pdf2iot2 (split+merge) done. 2010-06-02 13:26:35,656 INFO Recognizer: processing time: 914 seconds 2010-06-02 13:26:35,657 INFO Extractor: Extracting from ERP-2010-chapter10.pdf ... 2010-06-02 13:26:35,730 INFO Extractor: pages processed: 26 2010-06-02 13:26:35,730 INFO Extractor: images extracted: 10 2010-06-02 13:26:35,730 INFO Extractor: text extracted: 3 2010-06-02 13:26:35,730 INFO Extractor: processing time: 0 seconds 2010-06-02 13:26:35,736 INFO Recognizer: Recognizing ERP-2010-chapter10.pdf ... 2010-06-02 13:26:38,367 INFO Recognizer: img2txt done. 2010-06-02 13:27:09,516 INFO Recognizer: pdf2iot1 (naive) done. 2010-06-02 13:27:11,562 INFO Recognizer: pdf2iot2 (split+merge) done. 2010-06-02 13:27:11,563 INFO Recognizer: processing time: 35 seconds 2010-06-02 13:27:11,563 INFO Extractor: Extracting from ERP-2010-chapter3.pdf ... 2010-06-02 13:27:11,626 INFO Extractor: pages processed: 31 2010-06-02 13:27:11,626 INFO Extractor: images extracted: 1 2010-06-02 13:27:11,626 INFO Extractor: text extracted: 3 2010-06-02 13:27:11,626 INFO Extractor: processing time: 0 seconds 2010-06-02 13:27:11,630 INFO Recognizer: Recognizing ERP-2010-chapter3.pdf ... 2010-06-02 13:27:11,996 INFO Recognizer: img2txt done. 2010-06-02 13:27:53,444 INFO Recognizer: pdf2iot1 (naive) done. 2010-06-02 13:27:55,871 INFO Recognizer: pdf2iot2 (split+merge) done. 2010-06-02 13:27:55,871 INFO Recognizer: processing time: 44 seconds 2010-06-02 13:27:55,872 INFO Extractor: Extracting from ERP-2010-chapter9.pdf ... 2010-06-02 13:27:55,927 INFO Extractor: pages processed: 24 2010-06-02 13:27:55,927 INFO Extractor: images extracted: 1 2010-06-02 13:27:55,927 INFO Extractor: text extracted: 3 2010-06-02 13:27:55,927 INFO Extractor: processing time: 0 seconds 2010-06-02 13:27:55,931 INFO Recognizer: Recognizing ERP-2010-chapter9.pdf ... 2010-06-02 13:27:56,777 INFO Recognizer: img2txt done. 2010-06-02 13:28:28,158 INFO Recognizer: pdf2iot1 (naive) done. 2010-06-02 13:28:30,202 INFO Recognizer: pdf2iot2 (split+merge) done. 2010-06-02 13:28:30,202 INFO Recognizer: processing time: 34 seconds 2010-06-02 13:28:30,203 INFO Extractor: Extracting from ERP-2010-frontmatter.pdf ... 2010-06-02 13:28:30,243 INFO Extractor: pages processed: 13 2010-06-02 13:28:30,243 INFO Extractor: images extracted: 6 2010-06-02 13:28:30,243 INFO Extractor: text extracted: 6 2010-06-02 13:28:30,244 INFO Extractor: processing time: 0 seconds 2010-06-02 13:28:30,245 INFO Recognizer: Recognizing ERP-2010-frontmatter.pdf ... 2010-06-02 13:28:35,636 INFO Recognizer: img2txt done. 2010-06-02 13:28:44,938 INFO Recognizer: pdf2iot1 (naive) done. 2010-06-02 13:28:47,711 INFO Recognizer: pdf2iot2 (split+merge) done. 2010-06-02 13:28:47,711 INFO Recognizer: processing time: 17 seconds 2010-06-02 13:28:47,711 INFO Extractor: Extracting from FR-1996-02-12.pdf ... 2010-06-02 13:28:53,509 INFO Extractor: pages processed: 243 2010-06-02 13:28:53,509 INFO Extractor: images extracted: 1 2010-06-02 13:28:53,509 INFO Extractor: text extracted: 3 2010-06-02 13:28:53,509 INFO Extractor: processing time: 5 seconds 2010-06-02 13:28:53,529 INFO Recognizer: Recognizing FR-1996-02-12.pdf ... 2010-06-02 13:28:55,904 INFO Recognizer: img2txt done. 2010-06-02 13:37:47,485 INFO Recognizer: pdf2iot1 (naive) done. 2010-06-02 13:37:49,553 INFO Recognizer: pdf2iot2 (split+merge) done. 2010-06-02 13:37:49,554 INFO Recognizer: processing time: 536 seconds 2010-06-02 13:37:49,554 INFO Extractor: Extracting from FR-2008-01-29.pdf ... 2010-06-02 13:37:50,199 INFO Extractor: pages processed: 347 2010-06-02 13:37:50,200 INFO Extractor: images extracted: 2 2010-06-02 13:37:50,200 INFO Extractor: text extracted: 6 2010-06-02 13:37:50,200 INFO Extractor: processing time: 0 seconds 2010-06-02 13:37:50,238 INFO Recognizer: Recognizing FR-2008-01-29.pdf ... 2010-06-02 13:37:52,618 INFO Recognizer: img2txt done. 2010-06-02 13:51:40,717 INFO Recognizer: pdf2iot1 (naive) done. 2010-06-02 13:51:44,903 INFO Recognizer: pdf2iot2 (split+merge) done. 2010-06-02 13:51:44,903 INFO Recognizer: processing time: 834 seconds 2010-06-02 13:51:44,903 INFO Extractor: Extracting from GAOREPORTS-GAO-08-384.pdf ... 2010-06-02 13:51:45,209 INFO Extractor: pages processed: 32 2010-06-02 13:51:45,209 INFO Extractor: images extracted: 2 2010-06-02 13:51:45,209 INFO Extractor: text extracted: 6 2010-06-02 13:51:45,209 INFO Extractor: processing time: 0 seconds 2010-06-02 13:51:45,233 INFO Recognizer: Recognizing GAOREPORTS-GAO-08-384.pdf ... 2010-06-02 13:51:46,569 INFO Recognizer: img2txt done. 2010-06-02 13:52:29,010 INFO Recognizer: pdf2iot1 (naive) done. 2010-06-02 13:52:32,777 INFO Recognizer: pdf2iot2 (split+merge) done. 2010-06-02 13:52:32,777 INFO Recognizer: processing time: 47 seconds 2010-06-02 13:52:32,777 INFO Extractor: Extracting from GAOREPORTS-GAO-08-685T.pdf ... 2010-06-02 13:52:34,036 INFO Extractor: pages processed: 17 2010-06-02 13:52:34,036 INFO Extractor: images extracted: 2 2010-06-02 13:52:34,036 INFO Extractor: text extracted: 4 2010-06-02 13:52:34,036 INFO Extractor: processing time: 1 seconds 2010-06-02 13:52:34,045 INFO Recognizer: Recognizing GAOREPORTS-GAO-08-685T.pdf ... 2010-06-02 13:52:35,398 INFO Recognizer: img2txt done. 2010-06-02 13:53:06,020 INFO Recognizer: pdf2iot1 (naive) done. 2010-06-02 13:53:08,508 INFO Recognizer: pdf2iot2 (split+merge) done. 2010-06-02 13:53:08,508 INFO Recognizer: processing time: 34 seconds 2010-06-02 13:53:08,509 INFO Extractor: Extracting from GAOREPORTS-GAO-08-876R.pdf ... 2010-06-02 13:53:09,282 INFO Extractor: pages processed: 24 2010-06-02 13:53:09,282 INFO Extractor: images extracted: 6 2010-06-02 13:53:09,282 INFO Extractor: text extracted: 11 2010-06-02 13:53:09,282 INFO Extractor: processing time: 0 seconds 2010-06-02 13:53:09,304 INFO Recognizer: Recognizing GAOREPORTS-GAO-08-876R.pdf ... 2010-06-02 13:53:17,064 INFO Recognizer: img2txt done. 2010-06-02 13:53:58,566 INFO Recognizer: pdf2iot1 (naive) done. 2010-06-02 13:54:10,602 INFO Recognizer: pdf2iot2 (split+merge) done. 2010-06-02 13:54:10,602 INFO Recognizer: processing time: 61 seconds 2010-06-02 13:54:10,602 INFO Extractor: Extracting from PPP-2004-book1-frontmatter-pgiii.pdf ... 2010-06-02 13:54:10,636 WARN Extractor: Fail to extract image 1 on page 3. Use a blank image instead. java.lang.RuntimeException: 3. Unexpected value in file e - please send to IDRsolutions for analysis at org.jpedal.io.PdfReader.handleColorSpaces(Unknown Source) at org.jpedal.io.PdfReader.handleColorSpaces(Unknown Source) at org.jpedal.io.PdfReader.readKeyPairs(Unknown Source) at org.jpedal.io.PdfReader.readDictionaryAsObject(Unknown Source) at org.jpedal.io.PdfReader.convertDirectDictionaryToObject(Unknown Source) at org.jpedal.io.PdfReader.readDictionaryFromRefOrDirect(Unknown Source) at org.jpedal.io.PdfReader.checkResolved(Unknown Source) at org.jpedal.PdfDecoder.decodePage(Unknown Source) at pdfimage.Extractor.writePNGImage(Extractor.java:199) at pdfimage.Extractor.extractImage(Extractor.java:230) at pdfimage.Extractor.extract(Extractor.java:128) at pdfimage.PdfImage.run(PdfImage.java:94) at pdfimage.PdfImage.main(PdfImage.java:114) 2010-06-02 13:54:10,766 INFO Extractor: pages processed: 12 2010-06-02 13:54:10,766 INFO Extractor: images extracted: 1 2010-06-02 13:54:10,766 INFO Extractor: text extracted: 3 2010-06-02 13:54:10,766 INFO Extractor: processing time: 0 seconds 2010-06-02 13:54:10,770 INFO Recognizer: Recognizing PPP-2004-book1-frontmatter-pgiii.pdf ... 2010-06-02 13:54:11,032 INFO Recognizer: img2txt done. 2010-06-02 13:54:19,197 INFO Recognizer: pdf2iot1 (naive) done. 2010-06-02 13:54:21,838 INFO Recognizer: pdf2iot2 (split+merge) done. 2010-06-02 13:54:21,838 INFO Recognizer: processing time: 11 seconds 2010-06-02 13:54:21,839 INFO Extractor: Extracting from STATUTE-117.pdf ... 2010-06-02 13:54:24,200 INFO Extractor: pages processed: 3158 2010-06-02 13:54:24,200 INFO Extractor: images extracted: 14 2010-06-02 13:54:24,201 INFO Extractor: text extracted: 24 2010-06-02 13:54:24,201 INFO Extractor: processing time: 2 seconds 2010-06-02 13:54:24,429 INFO Recognizer: Recognizing STATUTE-117.pdf ... 2010-06-02 13:54:47,427 INFO Recognizer: img2txt done. 2010-06-02 15:45:49,461 INFO Recognizer: pdf2iot1 (naive) done. 2010-06-02 15:46:23,193 INFO Recognizer: pdf2iot2 (split+merge) done. 2010-06-02 15:46:23,193 INFO Recognizer: processing time: 6718 seconds 2010-06-02 15:46:23,194 INFO Extractor: Extracting from STATUTE-118-FrontMatter-Pgi.pdf ... 2010-06-02 15:46:23,258 INFO Extractor: pages processed: 31 2010-06-02 15:46:23,258 INFO Extractor: images extracted: 1 2010-06-02 15:46:23,258 INFO Extractor: text extracted: 2 2010-06-02 15:46:23,258 INFO Extractor: processing time: 0 seconds 2010-06-02 15:46:23,262 INFO Recognizer: Recognizing STATUTE-118-FrontMatter-Pgi.pdf ... 2010-06-02 15:46:23,782 INFO Recognizer: img2txt done. 2010-06-02 15:46:56,988 INFO Recognizer: pdf2iot1 (naive) done. 2010-06-02 15:46:57,832 INFO Recognizer: pdf2iot2 (split+merge) done. 2010-06-02 15:46:57,832 INFO Recognizer: processing time: 34 seconds 2010-06-02 15:46:57,833 INFO Extractor: Extracting from WCPD-1998-02-09-FrontMatter.pdf ... 2010-06-02 15:46:58,622 INFO Extractor: pages processed: 3 2010-06-02 15:46:58,622 INFO Extractor: images extracted: 1 2010-06-02 15:46:58,622 INFO Extractor: text extracted: 2 2010-06-02 15:46:58,622 INFO Extractor: processing time: 0 seconds 2010-06-02 15:46:58,625 INFO Recognizer: Recognizing WCPD-1998-02-09-FrontMatter.pdf ... 2010-06-02 15:46:59,532 INFO Recognizer: img2txt done. 2010-06-02 15:47:02,732 INFO Recognizer: pdf2iot1 (naive) done. 2010-06-02 15:47:03,966 INFO Recognizer: pdf2iot2 (split+merge) done. 2010-06-02 15:47:03,966 INFO Recognizer: processing time: 5 seconds