001/*
002 * (C) Copyright 2006-2009 Nuxeo SA (http://nuxeo.com/) and others.
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 *     http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 *
016 * Contributors:
017 *     Nuxeo - initial API and implementation
018 *
019 * $Id$
020 */
021
022package org.nuxeo.ecm.platform.mail.listener.action;
023
024import static org.nuxeo.ecm.platform.mail.utils.MailCoreConstants.ATTACHMENTS_KEY;
025import static org.nuxeo.ecm.platform.mail.utils.MailCoreConstants.CC_RECIPIENTS_KEY;
026import static org.nuxeo.ecm.platform.mail.utils.MailCoreConstants.CONTENT_KEY;
027import static org.nuxeo.ecm.platform.mail.utils.MailCoreConstants.MIMETYPE_SERVICE_KEY;
028import static org.nuxeo.ecm.platform.mail.utils.MailCoreConstants.RECIPIENTS_KEY;
029import static org.nuxeo.ecm.platform.mail.utils.MailCoreConstants.SENDER_EMAIL_KEY;
030import static org.nuxeo.ecm.platform.mail.utils.MailCoreConstants.SENDER_KEY;
031import static org.nuxeo.ecm.platform.mail.utils.MailCoreConstants.SENDING_DATE_KEY;
032import static org.nuxeo.ecm.platform.mail.utils.MailCoreConstants.SUBJECT_KEY;
033import static org.nuxeo.ecm.platform.mail.utils.MailCoreConstants.TEXT_KEY;
034
035import java.io.IOException;
036import java.io.InputStream;
037import java.io.UnsupportedEncodingException;
038import java.util.ArrayList;
039import java.util.Collection;
040import java.util.HashMap;
041import java.util.List;
042import java.util.Map;
043
044import javax.mail.Address;
045import javax.mail.BodyPart;
046import javax.mail.Message;
047import javax.mail.MessagingException;
048import javax.mail.Multipart;
049import javax.mail.Part;
050import javax.mail.internet.AddressException;
051import javax.mail.internet.ContentType;
052import javax.mail.internet.InternetAddress;
053import javax.mail.internet.MimeMessage;
054import javax.mail.internet.MimePart;
055import javax.mail.internet.MimeUtility;
056
057import org.apache.commons.io.IOUtils;
058import org.apache.commons.logging.Log;
059import org.apache.commons.logging.LogFactory;
060import org.nuxeo.ecm.core.api.Blob;
061import org.nuxeo.ecm.core.api.Blobs;
062import org.nuxeo.ecm.platform.mail.action.ExecutionContext;
063import org.nuxeo.ecm.platform.mail.utils.MailCoreConstants;
064import org.nuxeo.ecm.platform.mimetype.MimetypeDetectionException;
065import org.nuxeo.ecm.platform.mimetype.interfaces.MimetypeRegistry;
066import org.nuxeo.runtime.api.Framework;
067
068/**
069 * Puts on the pipe execution context the values retrieved from the new messages found in the INBOX. These values are
070 * used later when new MailMessage documents are created based on them.
071 *
072 * @author Catalin Baican
073 */
074public class ExtractMessageInformationAction extends AbstractMailAction {
075
076    private static final Log log = LogFactory.getLog(ExtractMessageInformationAction.class);
077
078    public static final String DEFAULT_BINARY_MIMETYPE = "application/octet-stream*";
079
080    public static final String MESSAGE_RFC822_MIMETYPE = "message/rfc822";
081
082    private String bodyContent;
083
084    public static final String COPY_MESSAGE = "org.nuxeo.mail.imap.copy";
085
086    @Override
087    public boolean execute(ExecutionContext context) {
088        bodyContent = "";
089
090        boolean copyMessage = Boolean.parseBoolean(Framework.getProperty(COPY_MESSAGE, "false"));
091
092        try {
093            Message originalMessage = context.getMessage();
094            if (log.isDebugEnabled()) {
095                log.debug("Transforming message, original subject: " + originalMessage.getSubject());
096            }
097
098            // fully load the message before trying to parse to
099            // override most of server bugs, see
100            // http://java.sun.com/products/javamail/FAQ.html#imapserverbug
101            Message message;
102            if (originalMessage instanceof MimeMessage && copyMessage) {
103                message = new MimeMessage((MimeMessage) originalMessage);
104                if (log.isDebugEnabled()) {
105                    log.debug("Transforming message after full load: " + message.getSubject());
106                }
107            } else {
108                // stuck with the original one
109                message = originalMessage;
110            }
111
112            // Subject
113            String subject = message.getSubject();
114            if (subject != null) {
115                subject = subject.trim();
116            }
117            if (subject == null || "".equals(subject)) {
118                subject = "<Unknown>";
119            }
120            context.put(SUBJECT_KEY, subject);
121
122            // Sender
123            try {
124                Address[] from = message.getFrom();
125                String sender = null;
126                String senderEmail = null;
127                if (from != null) {
128                    Address addr = from[0];
129                    if (addr instanceof InternetAddress) {
130                        InternetAddress iAddr = (InternetAddress) addr;
131                        senderEmail = iAddr.getAddress();
132                        sender = iAddr.getPersonal() + " <" + senderEmail + ">";
133                    } else {
134                        sender += addr.toString();
135                        senderEmail = sender;
136                    }
137                }
138                context.put(SENDER_KEY, sender);
139                context.put(SENDER_EMAIL_KEY, senderEmail);
140            } catch (AddressException ae) {
141                // try to parse sender from header instead
142                String[] values = message.getHeader("From");
143                if (values != null) {
144                    context.put(SENDER_KEY, values[0]);
145                }
146            }
147            // Sending date
148            context.put(SENDING_DATE_KEY, message.getSentDate());
149
150            // Recipients
151            try {
152                Address[] to = message.getRecipients(Message.RecipientType.TO);
153                Collection<String> recipients = new ArrayList<>();
154                if (to != null) {
155                    for (Address addr : to) {
156                        if (addr instanceof InternetAddress) {
157                            InternetAddress iAddr = (InternetAddress) addr;
158                            if (iAddr.getPersonal() != null) {
159                                recipients.add(iAddr.getPersonal() + " <" + iAddr.getAddress() + ">");
160                            } else {
161                                recipients.add(iAddr.getAddress());
162                            }
163                        } else {
164                            recipients.add(addr.toString());
165                        }
166                    }
167                }
168                context.put(RECIPIENTS_KEY, recipients);
169            } catch (AddressException ae) {
170                // try to parse recipient from header instead
171                Collection<String> recipients = getHeaderValues(message, Message.RecipientType.TO.toString());
172                context.put(RECIPIENTS_KEY, recipients);
173            }
174
175            // CC recipients
176
177            try {
178                Address[] toCC = message.getRecipients(Message.RecipientType.CC);
179                Collection<String> ccRecipients = new ArrayList<>();
180                if (toCC != null) {
181                    for (Address addr : toCC) {
182                        if (addr instanceof InternetAddress) {
183                            InternetAddress iAddr = (InternetAddress) addr;
184                            ccRecipients.add(iAddr.getPersonal() + " " + iAddr.getAddress());
185                        } else {
186                            ccRecipients.add(addr.toString());
187                        }
188                    }
189                }
190                context.put(CC_RECIPIENTS_KEY, ccRecipients);
191
192            } catch (AddressException ae) {
193                // try to parse ccRecipient from header instead
194                Collection<String> ccRecipients = getHeaderValues(message, Message.RecipientType.CC.toString());
195                context.put(CC_RECIPIENTS_KEY, ccRecipients);
196            }
197            String[] messageIdHeader = message.getHeader("Message-ID");
198            if (messageIdHeader != null) {
199                context.put(MailCoreConstants.MESSAGE_ID_KEY, messageIdHeader[0]);
200            }
201
202            MimetypeRegistry mimeService = (MimetypeRegistry) context.getInitialContext().get(MIMETYPE_SERVICE_KEY);
203
204            List<Blob> blobs = new ArrayList<>();
205            context.put(ATTACHMENTS_KEY, blobs);
206            context.put(CONTENT_KEY, new HashMap<String, String>());
207
208            // String[] cte = message.getHeader("Content-Transfer-Encoding");
209
210            // process content
211            getAttachmentParts(message, subject, mimeService, context);
212
213            context.put(TEXT_KEY, bodyContent);
214
215            return true;
216        } catch (MessagingException | IOException e) {
217            log.error(e, e);
218        }
219        return false;
220    }
221
222    protected static String getFilename(Part p, String defaultFileName) throws MessagingException {
223        String originalFilename = p.getFileName();
224        if (originalFilename == null || originalFilename.trim().length() == 0) {
225            String filename = defaultFileName;
226            // using default filename => add extension for this type
227            if (p.isMimeType("text/plain")) {
228                filename += ".txt";
229            } else if (p.isMimeType("text/html")) {
230                filename += ".html";
231            }
232            return filename;
233        } else {
234            try {
235                return MimeUtility.decodeText(originalFilename.trim());
236            } catch (UnsupportedEncodingException e) {
237                return originalFilename.trim();
238            }
239        }
240    }
241
242    protected void getAttachmentParts(Part part, String defaultFilename, MimetypeRegistry mimeService,
243            ExecutionContext context) throws MessagingException, IOException {
244        String filename = getFilename(part, defaultFilename);
245        List<Blob> blobs = (List<Blob>) context.get(ATTACHMENTS_KEY);
246        Map<String, String> contentKeys = (Map<String, String>) context.get(CONTENT_KEY);
247
248        if (part.isMimeType("multipart/alternative")) {
249            bodyContent += getText(part, defaultFilename, mimeService, context);
250        } else {
251            if (!part.isMimeType("multipart/*")) {
252                String disp = part.getDisposition();
253                // no disposition => mail body, which can be also blob (image for
254                // instance)
255                if (disp == null && // convert only text
256                        part.getContentType().toLowerCase().startsWith("text/")) {
257                    bodyContent += decodeMailBody(part);
258                } else {
259                    Blob blob;
260                    try (InputStream in = part.getInputStream()) {
261                        blob = Blobs.createBlob(in);
262                    }
263                    String mime = DEFAULT_BINARY_MIMETYPE;
264                    try {
265                        if (mimeService != null) {
266                            ContentType contentType = new ContentType(part.getContentType());
267                            mime = mimeService.getMimetypeFromFilenameAndBlobWithDefault(filename, blob,
268                                    contentType.getBaseType());
269                        }
270                    } catch (MessagingException | MimetypeDetectionException e) {
271                        log.error(e);
272                    }
273                    blob.setMimeType(mime);
274
275                    blob.setFilename(filename);
276                    if (part instanceof MimePart) {
277                        String contentId = ((MimePart) part).getContentID();
278                        if (contentId != null) {
279                            contentKeys.put(filename, contentId.replace("<", "").replace(">", ""));
280                        }
281                    }
282                    blobs.add(blob);
283                }
284            }
285
286            if (part.isMimeType("multipart/*")) {
287                // This is a Multipart
288                Multipart mp = (Multipart) part.getContent();
289
290                int count = mp.getCount();
291                for (int i = 0; i < count; i++) {
292                    getAttachmentParts(mp.getBodyPart(i), defaultFilename, mimeService, context);
293                }
294            } else if (part.isMimeType(MESSAGE_RFC822_MIMETYPE)) {
295                // This is a Nested Message
296                getAttachmentParts((Part) part.getContent(), defaultFilename, mimeService, context);
297            }
298        }
299    }
300
301    /**
302     * Return the primary text content of the message.
303     */
304    private String getText(Part p, String defaultFilename, MimetypeRegistry mimeService, ExecutionContext context)
305            throws MessagingException, IOException {
306        if (p.isMimeType("text/*")) {
307            return decodeMailBody(p);
308        }
309
310        if (p.isMimeType("multipart/alternative")) {
311            // prefer html text over plain text
312            Multipart mp = (Multipart) p.getContent();
313            String text = null;
314            for (int i = 0; i < mp.getCount(); i++) {
315                Part bp = mp.getBodyPart(i);
316                if (bp.isMimeType("text/plain")) {
317                    if (text == null) {
318                        text = getText(bp, defaultFilename, mimeService, context);
319                    }
320                    continue;
321                } else if (bp.isMimeType("text/html")) {
322                    String s = getText(bp, defaultFilename, mimeService, context);
323                    if (s != null) {
324                        return s;
325                    }
326                } else {
327                    return getText(bp, defaultFilename, mimeService, context);
328                }
329            }
330            return text;
331        } else if (p.isMimeType("multipart/*")) {
332            Multipart mp = (Multipart) p.getContent();
333            String s = null;
334            for (int i = 0; i < mp.getCount(); i++) {
335                BodyPart bodyPart = mp.getBodyPart(i);
336                if (Part.INLINE.equals(bodyPart.getDisposition())) {
337                    getAttachmentParts(bodyPart, defaultFilename, mimeService, context);
338                } else {
339                    s = getText(bodyPart, defaultFilename, mimeService, context);
340                }
341            }
342            if (s != null) {
343                return s;
344            }
345        }
346
347        return null;
348    }
349
350    /**
351     * Interprets the body accordingly to the charset used. It relies on the content type being
352     * ****;charset={charset};******
353     *
354     * @return the decoded String
355     */
356    protected static String decodeMailBody(Part part) throws MessagingException, IOException {
357
358        String encoding = null;
359
360        // try to get encoding from header rather than from Stream !
361        // unfortunately, this does not seem to be reliable ...
362        /*
363         * String[] cteHeader = part.getHeader("Content-Transfer-Encoding"); if (cteHeader!=null && cteHeader.length>0)
364         * { encoding = cteHeader[0].toLowerCase(); }
365         */
366
367        // fall back to default sniffing
368        // that will actually read the stream from server
369        if (encoding == null) {
370            encoding = MimeUtility.getEncoding(part.getDataHandler());
371        }
372
373        String contType = part.getContentType();
374        final String charsetIdentifier = "charset=";
375        final String ISO88591 = "iso-8859-1";
376        final String WINDOWS1252 = "windows-1252";
377        int offset = contType.indexOf(charsetIdentifier);
378        String charset = "";
379        if (offset >= 0) {
380            charset = contType.substring(offset + charsetIdentifier.length());
381            offset = charset.indexOf(";");
382            if (offset > 0) {
383                charset = charset.substring(0, offset);
384            }
385        }
386        // Charset could be like "utf-8" or utf-8
387        if (!"".equals(charset)) {
388            charset = charset.replaceAll("\"", "");
389        }
390        log.debug("Content type: " + contType + "; charset: " + charset);
391        if (charset.equalsIgnoreCase(ISO88591)) {
392            // see
393            // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#character1
394            // for more details see http://en.wikipedia.org/wiki/ISO_8859-1
395            // section "ISO-8859-1 and Windows-1252 confusion"
396            charset = WINDOWS1252;
397            log.debug("Using replacing charset: " + charset);
398        }
399
400        try (InputStream is = MimeUtility.decode(part.getInputStream(), encoding)) {
401            String ret;
402            byte[] streamContent = IOUtils.toByteArray(is);
403            if ("".equals(charset)) {
404                ret = new String(streamContent);
405            } else {
406                try {
407                    ret = new String(streamContent, charset);
408                } catch (UnsupportedEncodingException e) {
409                    // try without encoding
410                    ret = new String(streamContent);
411                }
412            }
413            return ret;
414        } catch (IOException ex) {
415            log.error("Unable to read content", ex);
416            return "";
417        }
418    }
419
420    public Collection<String> getHeaderValues(Message message, String headerName) throws MessagingException {
421        Collection<String> valuesList = new ArrayList<>();
422        String[] values = message.getHeader(headerName);
423        if (values != null) {
424            for (String value : values) {
425                valuesList.add(value);
426            }
427        }
428        return valuesList;
429    }
430
431}