我有一个与ERROR从Gmail获取pdf附件为文本类似的错误,但在我的情况下不起作用。驱动器API的启用。文件扩展名为.pdf。该脚本适用于其他文档(也是pdf)。以文本形式阅读pdf,方法是从Gmail中以文本形式获取pdf附件
请帮忙。
这是我的代码:
function searchEmails() {
var threads = GmailApp.search('in:inbox newer_than:6d');
if (threads.length > 0) {
for (var t=threads.length-1; t>=0; t--) {
var thread = threads[t];
var message = thread.getMessages()[0];
var from = message.getFrom();
var subject = message.getSubject();
var to = message.getTo();
var date = message.getDate();
var body = message.getBody();
var attachments = message.getAttachments();
if (subject == 'subject') {
Messages(message)
}
}
}
}
function Messages(message) {
var attachments = message.getAttachments();
var blob = attachments[0].getAs(MimeType.PDF);
var body = message.getBody();
var filetext = pdfToText(blob);
filetext = filetext.substr(filetext.search("Title:"));
filetext = filetext.split(' ');
var msgValue = filetext[12];
var msgDate = filetext[6];
var msgID = message.getId();
// rest of my code
function pdfToText (blob, options) {
options = options || {};
var parents = [];
if (options.path) {
parents.push(getDriveFolderFromPath(options.path));
}
var pdfName = blob.getName();
var resource = {
title: pdfName,
mimeType: blob.getContentType(),
parents: parents
};
// Save PDF as GDOC
resource.title = pdfName.replace(/pdf$/, 'gdoc');
var insertOpts = {
ocr: true,
ocrLanguage: options.ocrLanguage || 'pl'
}
var gdocFile = Drive.Files.insert(resource, blob, insertOpts);
// Get text from GDOC
var gdocDoc = DocumentApp.openById(gdocFile.id);
var text = gdocDoc.getBody().getText();
// Delete document.
if (!options.keepGdoc) {
Drive.Files.remove(gdocFile.id);
}
return text;
}
function getDriveFolderFromPath (path) {
return (path || "/").split("/").reduce ( function(prev,current) {
if (prev && current) {
var fldrs = prev.getFoldersByName(current);
return fldrs.hasNext() ? fldrs.next() : null;
}
else {
return current ? null : prev;
}
},DriveApp.getRootFolder());
}
首先,我的第一个代码有效。但是,如果电子邮件中有多个附件,您将遇到我原来遇到的问题。解决方案如下:
function searchEmails() {
var threads = GmailApp.search('in:inbox newer_than:6d');
if (threads.length > 0) {
for (var t=threads.length-1; t>=0; t--) {
var msgs = GmailApp.getMessagesForThreads(threads);
for (var i = 0 ; i < msgs.length; i++) {
for (var j = 0; j < msgs[i].length; j++) {
var attachments = msgs[i][j].getAttachments();
for (var k = 0; k < attachments.length; k++) {
var content = attachments[k].getContentType();
Logger.log(attachments[k].getName()) //check file extension
if (content == 'application/pdf'){
/*
you can check if the attachment has the expected name
var attachmentsName = attachments[k].getName();
if (attachmentsName == 'looking name'){
*/
var blob = attachments[k].getAs(MimeType.PDF);
var filetext = pdfToText(blob);
filetext = filetext.substr(filetext.search("SZCZEGÓŁY"));
filetext = filetext.split(' ');
var msgValue = filetext[14] + filetext[15];
var msgDate = filetext[6];
var type = filetext[3];
// rest of my code
function pdfToText (blob, options) {
options = options || {};
var parents = [];
if (options.path) {
parents.push(getDriveFolderFromPath(options.path));
}
var pdfName = blob.getName();
var resource = {
title: pdfName,
mimeType: blob.getContentType(),
parents: parents
};
// Save PDF as GDOC
resource.title = pdfName.replace(/pdf$/, 'gdoc');
var insertOpts = {
ocr: true,
ocrLanguage: options.ocrLanguage || 'pl'
}
var gdocFile = Drive.Files.insert(resource, blob, insertOpts);
// Get text from GDOC
var gdocDoc = DocumentApp.openById(gdocFile.id);
var text = gdocDoc.getBody().getText();
// Delete document.
if (!options.keepGdoc) {
Drive.Files.remove(gdocFile.id);
}
return text;
}
function getDriveFolderFromPath (path) {
return (path || "/").split("/").reduce ( function(prev,current) {
if (prev && current) {
var fldrs = prev.getFoldersByName(current);
return fldrs.hasNext() ? fldrs.next() : null;
}
else {
return current ? null : prev;
}
},DriveApp.getRootFolder());
}