001/*
002 * (C) Copyright 2006-2009 Nuxeo SAS (http://nuxeo.com/) and contributors.
003 *
004 * All rights reserved. This program and the accompanying materials
005 * are made available under the terms of the GNU Lesser General Public License
006 * (LGPL) version 2.1 which accompanies this distribution, and is available at
007 * http://www.gnu.org/licenses/lgpl.html
008 *
009 * This library is distributed in the hope that it will be useful,
010 * but WITHOUT ANY WARRANTY; without even the implied warranty of
011 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
012 * Lesser General Public License for more details.
013 *
014 * Contributors:
015 *     Nuxeo - initial API and implementation
016 *
017 * $Id$
018 */
019
020package org.nuxeo.ecm.platform.mail.listener.action;
021
022import static org.nuxeo.ecm.platform.mail.utils.MailCoreConstants.ATTACHMENTS_KEY;
023import static org.nuxeo.ecm.platform.mail.utils.MailCoreConstants.CC_RECIPIENTS_KEY;
024import static org.nuxeo.ecm.platform.mail.utils.MailCoreConstants.MIMETYPE_SERVICE_KEY;
025import static org.nuxeo.ecm.platform.mail.utils.MailCoreConstants.RECIPIENTS_KEY;
026import static org.nuxeo.ecm.platform.mail.utils.MailCoreConstants.SENDER_EMAIL_KEY;
027import static org.nuxeo.ecm.platform.mail.utils.MailCoreConstants.SENDER_KEY;
028import static org.nuxeo.ecm.platform.mail.utils.MailCoreConstants.SENDING_DATE_KEY;
029import static org.nuxeo.ecm.platform.mail.utils.MailCoreConstants.SUBJECT_KEY;
030import static org.nuxeo.ecm.platform.mail.utils.MailCoreConstants.TEXT_KEY;
031
032import java.io.IOException;
033import java.io.InputStream;
034import java.io.UnsupportedEncodingException;
035import java.util.ArrayList;
036import java.util.Collection;
037import java.util.List;
038
039import javax.mail.Address;
040import javax.mail.Message;
041import javax.mail.MessagingException;
042import javax.mail.Multipart;
043import javax.mail.Part;
044import javax.mail.internet.AddressException;
045import javax.mail.internet.ContentType;
046import javax.mail.internet.InternetAddress;
047import javax.mail.internet.MimeMessage;
048import javax.mail.internet.MimeUtility;
049
050import org.apache.commons.logging.Log;
051import org.apache.commons.logging.LogFactory;
052import org.nuxeo.common.utils.FileUtils;
053import org.nuxeo.ecm.core.api.Blob;
054import org.nuxeo.ecm.core.api.Blobs;
055import org.nuxeo.ecm.platform.mail.action.ExecutionContext;
056import org.nuxeo.ecm.platform.mail.utils.MailCoreConstants;
057import org.nuxeo.ecm.platform.mimetype.MimetypeDetectionException;
058import org.nuxeo.ecm.platform.mimetype.interfaces.MimetypeRegistry;
059import org.nuxeo.runtime.api.Framework;
060
061/**
062 * Puts on the pipe execution context the values retrieved from the new messages found in the INBOX. These values are
063 * used later when new MailMessage documents are created based on them.
064 *
065 * @author Catalin Baican
066 */
067public class ExtractMessageInformationAction extends AbstractMailAction {
068
069    private static final Log log = LogFactory.getLog(ExtractMessageInformationAction.class);
070
071    public static final String DEFAULT_BINARY_MIMETYPE = "application/octet-stream*";
072
073    public static final String MESSAGE_RFC822_MIMETYPE = "message/rfc822";
074
075    private String bodyContent;
076
077    public static final String COPY_MESSAGE = "org.nuxeo.mail.imap.copy";
078
079    @Override
080    public boolean execute(ExecutionContext context) {
081        bodyContent = "";
082
083        boolean copyMessage = Boolean.parseBoolean(Framework.getProperty(COPY_MESSAGE, "false"));
084
085        try {
086            Message originalMessage = context.getMessage();
087            if (log.isDebugEnabled()) {
088                log.debug("Transforming message, original subject: " + originalMessage.getSubject());
089            }
090
091            // fully load the message before trying to parse to
092            // override most of server bugs, see
093            // http://java.sun.com/products/javamail/FAQ.html#imapserverbug
094            Message message;
095            if (originalMessage instanceof MimeMessage && copyMessage) {
096                message = new MimeMessage((MimeMessage) originalMessage);
097                if (log.isDebugEnabled()) {
098                    log.debug("Transforming message after full load: " + message.getSubject());
099                }
100            } else {
101                // stuck with the original one
102                message = originalMessage;
103            }
104
105            // Subject
106            String subject = message.getSubject();
107            if (subject != null) {
108                subject = subject.trim();
109            }
110            if (subject == null || "".equals(subject)) {
111                subject = "<Unknown>";
112            }
113            context.put(SUBJECT_KEY, subject);
114
115            // Sender
116            try {
117                Address[] from = message.getFrom();
118                String sender = null;
119                String senderEmail = null;
120                if (from != null) {
121                    Address addr = from[0];
122                    if (addr instanceof InternetAddress) {
123                        InternetAddress iAddr = (InternetAddress) addr;
124                        senderEmail = iAddr.getAddress();
125                        sender = iAddr.getPersonal() + " <" + senderEmail + ">";
126                    } else {
127                        sender += addr.toString();
128                        senderEmail = sender;
129                    }
130                }
131                context.put(SENDER_KEY, sender);
132                context.put(SENDER_EMAIL_KEY, senderEmail);
133            } catch (AddressException ae) {
134                // try to parse sender from header instead
135                String[] values = message.getHeader("From");
136                if (values != null) {
137                    context.put(SENDER_KEY, values[0]);
138                }
139            }
140            // Sending date
141            context.put(SENDING_DATE_KEY, message.getSentDate());
142
143            // Recipients
144            try {
145                Address[] to = message.getRecipients(Message.RecipientType.TO);
146                Collection<String> recipients = new ArrayList<String>();
147                if (to != null) {
148                    for (Address addr : to) {
149                        if (addr instanceof InternetAddress) {
150                            InternetAddress iAddr = (InternetAddress) addr;
151                            if (iAddr.getPersonal() != null) {
152                                recipients.add(iAddr.getPersonal() + " <" + iAddr.getAddress() + ">");
153                            } else {
154                                recipients.add(iAddr.getAddress());
155                            }
156                        } else {
157                            recipients.add(addr.toString());
158                        }
159                    }
160                }
161                context.put(RECIPIENTS_KEY, recipients);
162            } catch (AddressException ae) {
163                // try to parse recipient from header instead
164                Collection<String> recipients = getHeaderValues(message, Message.RecipientType.TO.toString());
165                context.put(RECIPIENTS_KEY, recipients);
166            }
167
168            // CC recipients
169
170            try {
171                Address[] toCC = message.getRecipients(Message.RecipientType.CC);
172                Collection<String> ccRecipients = new ArrayList<String>();
173                if (toCC != null) {
174                    for (Address addr : toCC) {
175                        if (addr instanceof InternetAddress) {
176                            InternetAddress iAddr = (InternetAddress) addr;
177                            ccRecipients.add(iAddr.getPersonal() + " " + iAddr.getAddress());
178                        } else {
179                            ccRecipients.add(addr.toString());
180                        }
181                    }
182                }
183                context.put(CC_RECIPIENTS_KEY, ccRecipients);
184
185            } catch (AddressException ae) {
186                // try to parse ccRecipient from header instead
187                Collection<String> ccRecipients = getHeaderValues(message, Message.RecipientType.CC.toString());
188                context.put(CC_RECIPIENTS_KEY, ccRecipients);
189            }
190            String[] messageIdHeader = message.getHeader("Message-ID");
191            if (messageIdHeader != null) {
192                context.put(MailCoreConstants.MESSAGE_ID_KEY, messageIdHeader[0]);
193            }
194
195            MimetypeRegistry mimeService = (MimetypeRegistry) context.getInitialContext().get(MIMETYPE_SERVICE_KEY);
196
197            List<Blob> blobs = new ArrayList<Blob>();
198            context.put(ATTACHMENTS_KEY, blobs);
199
200            // String[] cte = message.getHeader("Content-Transfer-Encoding");
201
202            // process content
203            getAttachmentParts(message, subject, mimeService, context);
204
205            context.put(TEXT_KEY, bodyContent);
206
207            return true;
208        } catch (MessagingException | IOException e) {
209            log.error(e, e);
210        }
211        return false;
212    }
213
214    protected static String getFilename(Part p, String defaultFileName) throws MessagingException {
215        String originalFilename = p.getFileName();
216        if (originalFilename == null || originalFilename.trim().length() == 0) {
217            String filename = defaultFileName;
218            // using default filename => add extension for this type
219            if (p.isMimeType("text/plain")) {
220                filename += ".txt";
221            } else if (p.isMimeType("text/html")) {
222                filename += ".html";
223            }
224            return filename;
225        } else {
226            try {
227                return MimeUtility.decodeText(originalFilename.trim());
228            } catch (UnsupportedEncodingException e) {
229                return originalFilename.trim();
230            }
231        }
232    }
233
234    protected void getAttachmentParts(Part part, String defaultFilename, MimetypeRegistry mimeService,
235            ExecutionContext context) throws MessagingException, IOException {
236        String filename = getFilename(part, defaultFilename);
237        List<Blob> blobs = (List<Blob>) context.get(ATTACHMENTS_KEY);
238
239        if (part.isMimeType("multipart/alternative")) {
240            bodyContent += getText(part);
241        } else {
242            if (!part.isMimeType("multipart/*")) {
243                String disp = part.getDisposition();
244                // no disposition => mail body, which can be also blob (image for
245                // instance)
246                if (disp == null && // convert only text
247                        part.getContentType().toLowerCase().startsWith("text/")) {
248                    bodyContent += decodeMailBody(part);
249                } else {
250                    Blob blob;
251                    try (InputStream in = part.getInputStream()) {
252                        blob = Blobs.createBlob(in);
253                    }
254                    String mime = DEFAULT_BINARY_MIMETYPE;
255                    try {
256                        if (mimeService != null) {
257                            ContentType contentType = new ContentType(part.getContentType());
258                            mime = mimeService.getMimetypeFromFilenameAndBlobWithDefault(filename, blob,
259                                    contentType.getBaseType());
260                        }
261                    } catch (MessagingException | MimetypeDetectionException e) {
262                        log.error(e);
263                    }
264                    blob.setMimeType(mime);
265
266                    blob.setFilename(filename);
267
268                    blobs.add(blob);
269                }
270            }
271
272            if (part.isMimeType("multipart/*")) {
273                // This is a Multipart
274                Multipart mp = (Multipart) part.getContent();
275
276                int count = mp.getCount();
277                for (int i = 0; i < count; i++) {
278                    getAttachmentParts(mp.getBodyPart(i), defaultFilename, mimeService, context);
279                }
280            } else if (part.isMimeType(MESSAGE_RFC822_MIMETYPE)) {
281                // This is a Nested Message
282                getAttachmentParts((Part) part.getContent(), defaultFilename, mimeService, context);
283            }
284        }
285
286    }
287
288    /**
289     * Return the primary text content of the message.
290     */
291    private String getText(Part p) throws MessagingException, IOException {
292        if (p.isMimeType("text/*")) {
293            return decodeMailBody(p);
294        }
295
296        if (p.isMimeType("multipart/alternative")) {
297            // prefer html text over plain text
298            Multipart mp = (Multipart) p.getContent();
299            String text = null;
300            for (int i = 0; i < mp.getCount(); i++) {
301                Part bp = mp.getBodyPart(i);
302                if (bp.isMimeType("text/plain")) {
303                    if (text == null) {
304                        text = getText(bp);
305                    }
306                    continue;
307                } else if (bp.isMimeType("text/html")) {
308                    String s = getText(bp);
309                    if (s != null) {
310                        return s;
311                    }
312                } else {
313                    return getText(bp);
314                }
315            }
316            return text;
317        } else if (p.isMimeType("multipart/*")) {
318            Multipart mp = (Multipart) p.getContent();
319            for (int i = 0; i < mp.getCount(); i++) {
320                String s = getText(mp.getBodyPart(i));
321                if (s != null) {
322                    return s;
323                }
324            }
325        }
326
327        return null;
328    }
329
330    /**
331     * Interprets the body accordingly to the charset used. It relies on the content type being
332     * ****;charset={charset};******
333     *
334     * @return the decoded String
335     */
336    protected static String decodeMailBody(Part part) throws MessagingException, IOException {
337
338        String encoding = null;
339
340        // try to get encoding from header rather than from Stream !
341        // unfortunately, this does not seem to be reliable ...
342        /*
343         * String[] cteHeader = part.getHeader("Content-Transfer-Encoding"); if (cteHeader!=null && cteHeader.length>0)
344         * { encoding = cteHeader[0].toLowerCase(); }
345         */
346
347        // fall back to default sniffing
348        // that will actually read the stream from server
349        if (encoding == null) {
350            encoding = MimeUtility.getEncoding(part.getDataHandler());
351        }
352
353        InputStream is = null;
354        try {
355            is = MimeUtility.decode(part.getInputStream(), encoding);
356        } catch (IOException ex) {
357            log.error("Unable to read content", ex);
358            return "";
359        }
360
361        String contType = part.getContentType();
362        final String charsetIdentifier = "charset=";
363        final String ISO88591 = "iso-8859-1";
364        final String WINDOWS1252 = "windows-1252";
365        int offset = contType.indexOf(charsetIdentifier);
366        String charset = "";
367        if (offset >= 0) {
368            charset = contType.substring(offset + charsetIdentifier.length());
369            offset = charset.indexOf(";");
370            if (offset > 0) {
371                charset = charset.substring(0, offset);
372            }
373        }
374        // Charset could be like "utf-8" or utf-8
375        if (!"".equals(charset)) {
376            charset = charset.replaceAll("\"", "");
377        }
378        log.debug("Content type: " + contType + "; charset: " + charset);
379        if (charset.equalsIgnoreCase(ISO88591)) {
380            // see
381            // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#character1
382            // for more details see http://en.wikipedia.org/wiki/ISO_8859-1
383            // section "ISO-8859-1 and Windows-1252 confusion"
384            charset = WINDOWS1252;
385            log.debug("Using replacing charset: " + charset);
386        }
387        String ret;
388        byte[] streamContent = FileUtils.readBytes(is);
389        if ("".equals(charset)) {
390            ret = new String(streamContent);
391        } else {
392            try {
393                ret = new String(streamContent, charset);
394            } catch (UnsupportedEncodingException e) {
395                // try without encoding
396                ret = new String(streamContent);
397            }
398        }
399        return ret;
400    }
401
402    public Collection<String> getHeaderValues(Message message, String headerName) throws MessagingException {
403        Collection<String> valuesList = new ArrayList<String>();
404        String[] values = message.getHeader(headerName);
405        if (values != null) {
406            for (String value : values) {
407                valuesList.add(value);
408            }
409        }
410        return valuesList;
411    }
412
413}