001/*
002 * (C) Copyright 2006-2009 Nuxeo SA (http://nuxeo.com/) and others.
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 *     http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 *
016 * Contributors:
017 *     Nuxeo - initial API and implementation
018 *
019 * $Id$
020 */
021
022package org.nuxeo.ecm.platform.mail.listener.action;
023
024import static org.nuxeo.ecm.platform.mail.utils.MailCoreConstants.ATTACHMENTS_KEY;
025import static org.nuxeo.ecm.platform.mail.utils.MailCoreConstants.CC_RECIPIENTS_KEY;
026import static org.nuxeo.ecm.platform.mail.utils.MailCoreConstants.MIMETYPE_SERVICE_KEY;
027import static org.nuxeo.ecm.platform.mail.utils.MailCoreConstants.RECIPIENTS_KEY;
028import static org.nuxeo.ecm.platform.mail.utils.MailCoreConstants.SENDER_EMAIL_KEY;
029import static org.nuxeo.ecm.platform.mail.utils.MailCoreConstants.SENDER_KEY;
030import static org.nuxeo.ecm.platform.mail.utils.MailCoreConstants.SENDING_DATE_KEY;
031import static org.nuxeo.ecm.platform.mail.utils.MailCoreConstants.SUBJECT_KEY;
032import static org.nuxeo.ecm.platform.mail.utils.MailCoreConstants.TEXT_KEY;
033
034import java.io.IOException;
035import java.io.InputStream;
036import java.io.UnsupportedEncodingException;
037import java.util.ArrayList;
038import java.util.Collection;
039import java.util.List;
040
041import javax.mail.Address;
042import javax.mail.Message;
043import javax.mail.MessagingException;
044import javax.mail.Multipart;
045import javax.mail.Part;
046import javax.mail.internet.AddressException;
047import javax.mail.internet.ContentType;
048import javax.mail.internet.InternetAddress;
049import javax.mail.internet.MimeMessage;
050import javax.mail.internet.MimeUtility;
051
052import org.apache.commons.io.IOUtils;
053import org.apache.commons.logging.Log;
054import org.apache.commons.logging.LogFactory;
055import org.nuxeo.ecm.core.api.Blob;
056import org.nuxeo.ecm.core.api.Blobs;
057import org.nuxeo.ecm.platform.mail.action.ExecutionContext;
058import org.nuxeo.ecm.platform.mail.utils.MailCoreConstants;
059import org.nuxeo.ecm.platform.mimetype.MimetypeDetectionException;
060import org.nuxeo.ecm.platform.mimetype.interfaces.MimetypeRegistry;
061import org.nuxeo.runtime.api.Framework;
062
063/**
064 * Puts on the pipe execution context the values retrieved from the new messages found in the INBOX. These values are
065 * used later when new MailMessage documents are created based on them.
066 *
067 * @author Catalin Baican
068 */
069public class ExtractMessageInformationAction extends AbstractMailAction {
070
071    private static final Log log = LogFactory.getLog(ExtractMessageInformationAction.class);
072
073    public static final String DEFAULT_BINARY_MIMETYPE = "application/octet-stream*";
074
075    public static final String MESSAGE_RFC822_MIMETYPE = "message/rfc822";
076
077    private String bodyContent;
078
079    public static final String COPY_MESSAGE = "org.nuxeo.mail.imap.copy";
080
081    @Override
082    public boolean execute(ExecutionContext context) {
083        bodyContent = "";
084
085        boolean copyMessage = Boolean.parseBoolean(Framework.getProperty(COPY_MESSAGE, "false"));
086
087        try {
088            Message originalMessage = context.getMessage();
089            if (log.isDebugEnabled()) {
090                log.debug("Transforming message, original subject: " + originalMessage.getSubject());
091            }
092
093            // fully load the message before trying to parse to
094            // override most of server bugs, see
095            // http://java.sun.com/products/javamail/FAQ.html#imapserverbug
096            Message message;
097            if (originalMessage instanceof MimeMessage && copyMessage) {
098                message = new MimeMessage((MimeMessage) originalMessage);
099                if (log.isDebugEnabled()) {
100                    log.debug("Transforming message after full load: " + message.getSubject());
101                }
102            } else {
103                // stuck with the original one
104                message = originalMessage;
105            }
106
107            // Subject
108            String subject = message.getSubject();
109            if (subject != null) {
110                subject = subject.trim();
111            }
112            if (subject == null || "".equals(subject)) {
113                subject = "<Unknown>";
114            }
115            context.put(SUBJECT_KEY, subject);
116
117            // Sender
118            try {
119                Address[] from = message.getFrom();
120                String sender = null;
121                String senderEmail = null;
122                if (from != null) {
123                    Address addr = from[0];
124                    if (addr instanceof InternetAddress) {
125                        InternetAddress iAddr = (InternetAddress) addr;
126                        senderEmail = iAddr.getAddress();
127                        sender = iAddr.getPersonal() + " <" + senderEmail + ">";
128                    } else {
129                        sender += addr.toString();
130                        senderEmail = sender;
131                    }
132                }
133                context.put(SENDER_KEY, sender);
134                context.put(SENDER_EMAIL_KEY, senderEmail);
135            } catch (AddressException ae) {
136                // try to parse sender from header instead
137                String[] values = message.getHeader("From");
138                if (values != null) {
139                    context.put(SENDER_KEY, values[0]);
140                }
141            }
142            // Sending date
143            context.put(SENDING_DATE_KEY, message.getSentDate());
144
145            // Recipients
146            try {
147                Address[] to = message.getRecipients(Message.RecipientType.TO);
148                Collection<String> recipients = new ArrayList<String>();
149                if (to != null) {
150                    for (Address addr : to) {
151                        if (addr instanceof InternetAddress) {
152                            InternetAddress iAddr = (InternetAddress) addr;
153                            if (iAddr.getPersonal() != null) {
154                                recipients.add(iAddr.getPersonal() + " <" + iAddr.getAddress() + ">");
155                            } else {
156                                recipients.add(iAddr.getAddress());
157                            }
158                        } else {
159                            recipients.add(addr.toString());
160                        }
161                    }
162                }
163                context.put(RECIPIENTS_KEY, recipients);
164            } catch (AddressException ae) {
165                // try to parse recipient from header instead
166                Collection<String> recipients = getHeaderValues(message, Message.RecipientType.TO.toString());
167                context.put(RECIPIENTS_KEY, recipients);
168            }
169
170            // CC recipients
171
172            try {
173                Address[] toCC = message.getRecipients(Message.RecipientType.CC);
174                Collection<String> ccRecipients = new ArrayList<String>();
175                if (toCC != null) {
176                    for (Address addr : toCC) {
177                        if (addr instanceof InternetAddress) {
178                            InternetAddress iAddr = (InternetAddress) addr;
179                            ccRecipients.add(iAddr.getPersonal() + " " + iAddr.getAddress());
180                        } else {
181                            ccRecipients.add(addr.toString());
182                        }
183                    }
184                }
185                context.put(CC_RECIPIENTS_KEY, ccRecipients);
186
187            } catch (AddressException ae) {
188                // try to parse ccRecipient from header instead
189                Collection<String> ccRecipients = getHeaderValues(message, Message.RecipientType.CC.toString());
190                context.put(CC_RECIPIENTS_KEY, ccRecipients);
191            }
192            String[] messageIdHeader = message.getHeader("Message-ID");
193            if (messageIdHeader != null) {
194                context.put(MailCoreConstants.MESSAGE_ID_KEY, messageIdHeader[0]);
195            }
196
197            MimetypeRegistry mimeService = (MimetypeRegistry) context.getInitialContext().get(MIMETYPE_SERVICE_KEY);
198
199            List<Blob> blobs = new ArrayList<Blob>();
200            context.put(ATTACHMENTS_KEY, blobs);
201
202            // String[] cte = message.getHeader("Content-Transfer-Encoding");
203
204            // process content
205            getAttachmentParts(message, subject, mimeService, context);
206
207            context.put(TEXT_KEY, bodyContent);
208
209            return true;
210        } catch (MessagingException | IOException e) {
211            log.error(e, e);
212        }
213        return false;
214    }
215
216    protected static String getFilename(Part p, String defaultFileName) throws MessagingException {
217        String originalFilename = p.getFileName();
218        if (originalFilename == null || originalFilename.trim().length() == 0) {
219            String filename = defaultFileName;
220            // using default filename => add extension for this type
221            if (p.isMimeType("text/plain")) {
222                filename += ".txt";
223            } else if (p.isMimeType("text/html")) {
224                filename += ".html";
225            }
226            return filename;
227        } else {
228            try {
229                return MimeUtility.decodeText(originalFilename.trim());
230            } catch (UnsupportedEncodingException e) {
231                return originalFilename.trim();
232            }
233        }
234    }
235
236    protected void getAttachmentParts(Part part, String defaultFilename, MimetypeRegistry mimeService,
237            ExecutionContext context) throws MessagingException, IOException {
238        String filename = getFilename(part, defaultFilename);
239        List<Blob> blobs = (List<Blob>) context.get(ATTACHMENTS_KEY);
240
241        if (part.isMimeType("multipart/alternative")) {
242            bodyContent += getText(part);
243        } else {
244            if (!part.isMimeType("multipart/*")) {
245                String disp = part.getDisposition();
246                // no disposition => mail body, which can be also blob (image for
247                // instance)
248                if (disp == null && // convert only text
249                        part.getContentType().toLowerCase().startsWith("text/")) {
250                    bodyContent += decodeMailBody(part);
251                } else {
252                    Blob blob;
253                    try (InputStream in = part.getInputStream()) {
254                        blob = Blobs.createBlob(in);
255                    }
256                    String mime = DEFAULT_BINARY_MIMETYPE;
257                    try {
258                        if (mimeService != null) {
259                            ContentType contentType = new ContentType(part.getContentType());
260                            mime = mimeService.getMimetypeFromFilenameAndBlobWithDefault(filename, blob,
261                                    contentType.getBaseType());
262                        }
263                    } catch (MessagingException | MimetypeDetectionException e) {
264                        log.error(e);
265                    }
266                    blob.setMimeType(mime);
267
268                    blob.setFilename(filename);
269
270                    blobs.add(blob);
271                }
272            }
273
274            if (part.isMimeType("multipart/*")) {
275                // This is a Multipart
276                Multipart mp = (Multipart) part.getContent();
277
278                int count = mp.getCount();
279                for (int i = 0; i < count; i++) {
280                    getAttachmentParts(mp.getBodyPart(i), defaultFilename, mimeService, context);
281                }
282            } else if (part.isMimeType(MESSAGE_RFC822_MIMETYPE)) {
283                // This is a Nested Message
284                getAttachmentParts((Part) part.getContent(), defaultFilename, mimeService, context);
285            }
286        }
287
288    }
289
290    /**
291     * Return the primary text content of the message.
292     */
293    private String getText(Part p) throws MessagingException, IOException {
294        if (p.isMimeType("text/*")) {
295            return decodeMailBody(p);
296        }
297
298        if (p.isMimeType("multipart/alternative")) {
299            // prefer html text over plain text
300            Multipart mp = (Multipart) p.getContent();
301            String text = null;
302            for (int i = 0; i < mp.getCount(); i++) {
303                Part bp = mp.getBodyPart(i);
304                if (bp.isMimeType("text/plain")) {
305                    if (text == null) {
306                        text = getText(bp);
307                    }
308                    continue;
309                } else if (bp.isMimeType("text/html")) {
310                    String s = getText(bp);
311                    if (s != null) {
312                        return s;
313                    }
314                } else {
315                    return getText(bp);
316                }
317            }
318            return text;
319        } else if (p.isMimeType("multipart/*")) {
320            Multipart mp = (Multipart) p.getContent();
321            for (int i = 0; i < mp.getCount(); i++) {
322                String s = getText(mp.getBodyPart(i));
323                if (s != null) {
324                    return s;
325                }
326            }
327        }
328
329        return null;
330    }
331
332    /**
333     * Interprets the body accordingly to the charset used. It relies on the content type being
334     * ****;charset={charset};******
335     *
336     * @return the decoded String
337     */
338    protected static String decodeMailBody(Part part) throws MessagingException, IOException {
339
340        String encoding = null;
341
342        // try to get encoding from header rather than from Stream !
343        // unfortunately, this does not seem to be reliable ...
344        /*
345         * String[] cteHeader = part.getHeader("Content-Transfer-Encoding"); if (cteHeader!=null && cteHeader.length>0)
346         * { encoding = cteHeader[0].toLowerCase(); }
347         */
348
349        // fall back to default sniffing
350        // that will actually read the stream from server
351        if (encoding == null) {
352            encoding = MimeUtility.getEncoding(part.getDataHandler());
353        }
354
355        InputStream is = null;
356        try {
357            is = MimeUtility.decode(part.getInputStream(), encoding);
358        } catch (IOException ex) {
359            log.error("Unable to read content", ex);
360            return "";
361        }
362
363        String contType = part.getContentType();
364        final String charsetIdentifier = "charset=";
365        final String ISO88591 = "iso-8859-1";
366        final String WINDOWS1252 = "windows-1252";
367        int offset = contType.indexOf(charsetIdentifier);
368        String charset = "";
369        if (offset >= 0) {
370            charset = contType.substring(offset + charsetIdentifier.length());
371            offset = charset.indexOf(";");
372            if (offset > 0) {
373                charset = charset.substring(0, offset);
374            }
375        }
376        // Charset could be like "utf-8" or utf-8
377        if (!"".equals(charset)) {
378            charset = charset.replaceAll("\"", "");
379        }
380        log.debug("Content type: " + contType + "; charset: " + charset);
381        if (charset.equalsIgnoreCase(ISO88591)) {
382            // see
383            // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#character1
384            // for more details see http://en.wikipedia.org/wiki/ISO_8859-1
385            // section "ISO-8859-1 and Windows-1252 confusion"
386            charset = WINDOWS1252;
387            log.debug("Using replacing charset: " + charset);
388        }
389        String ret;
390        byte[] streamContent = IOUtils.toByteArray(is);
391        if ("".equals(charset)) {
392            ret = new String(streamContent);
393        } else {
394            try {
395                ret = new String(streamContent, charset);
396            } catch (UnsupportedEncodingException e) {
397                // try without encoding
398                ret = new String(streamContent);
399            }
400        }
401        return ret;
402    }
403
404    public Collection<String> getHeaderValues(Message message, String headerName) throws MessagingException {
405        Collection<String> valuesList = new ArrayList<String>();
406        String[] values = message.getHeader(headerName);
407        if (values != null) {
408            for (String value : values) {
409                valuesList.add(value);
410            }
411        }
412        return valuesList;
413    }
414
415}