001/* 002 * (C) Copyright 2006-2009 Nuxeo SA (http://nuxeo.com/) and others. 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 * 016 * Contributors: 017 * Nuxeo - initial API and implementation 018 * 019 * $Id$ 020 */ 021 022package org.nuxeo.ecm.platform.mail.listener.action; 023 024import static org.nuxeo.ecm.platform.mail.utils.MailCoreConstants.ATTACHMENTS_KEY; 025import static org.nuxeo.ecm.platform.mail.utils.MailCoreConstants.CC_RECIPIENTS_KEY; 026import static org.nuxeo.ecm.platform.mail.utils.MailCoreConstants.CONTENT_KEY; 027import static org.nuxeo.ecm.platform.mail.utils.MailCoreConstants.MIMETYPE_SERVICE_KEY; 028import static org.nuxeo.ecm.platform.mail.utils.MailCoreConstants.RECIPIENTS_KEY; 029import static org.nuxeo.ecm.platform.mail.utils.MailCoreConstants.SENDER_EMAIL_KEY; 030import static org.nuxeo.ecm.platform.mail.utils.MailCoreConstants.SENDER_KEY; 031import static org.nuxeo.ecm.platform.mail.utils.MailCoreConstants.SENDING_DATE_KEY; 032import static org.nuxeo.ecm.platform.mail.utils.MailCoreConstants.SUBJECT_KEY; 033import static org.nuxeo.ecm.platform.mail.utils.MailCoreConstants.TEXT_KEY; 034 035import java.io.IOException; 036import java.io.InputStream; 037import java.io.UnsupportedEncodingException; 038import java.util.ArrayList; 039import java.util.Collection; 040import java.util.HashMap; 041import java.util.List; 042import java.util.Map; 043 044import javax.mail.Address; 045import javax.mail.BodyPart; 046import javax.mail.Message; 047import javax.mail.MessagingException; 048import javax.mail.Multipart; 049import javax.mail.Part; 050import javax.mail.internet.AddressException; 051import javax.mail.internet.ContentType; 052import javax.mail.internet.InternetAddress; 053import javax.mail.internet.MimeMessage; 054import javax.mail.internet.MimePart; 055import javax.mail.internet.MimeUtility; 056 057import org.apache.commons.io.IOUtils; 058import org.apache.commons.logging.Log; 059import org.apache.commons.logging.LogFactory; 060import org.nuxeo.ecm.core.api.Blob; 061import org.nuxeo.ecm.core.api.Blobs; 062import org.nuxeo.ecm.platform.mail.action.ExecutionContext; 063import org.nuxeo.ecm.platform.mail.utils.MailCoreConstants; 064import org.nuxeo.ecm.platform.mimetype.MimetypeDetectionException; 065import org.nuxeo.ecm.platform.mimetype.interfaces.MimetypeRegistry; 066import org.nuxeo.runtime.api.Framework; 067 068/** 069 * Puts on the pipe execution context the values retrieved from the new messages found in the INBOX. These values are 070 * used later when new MailMessage documents are created based on them. 071 * 072 * @author Catalin Baican 073 */ 074public class ExtractMessageInformationAction extends AbstractMailAction { 075 076 private static final Log log = LogFactory.getLog(ExtractMessageInformationAction.class); 077 078 public static final String DEFAULT_BINARY_MIMETYPE = "application/octet-stream*"; 079 080 public static final String MESSAGE_RFC822_MIMETYPE = "message/rfc822"; 081 082 private String bodyContent; 083 084 public static final String COPY_MESSAGE = "org.nuxeo.mail.imap.copy"; 085 086 @Override 087 public boolean execute(ExecutionContext context) { 088 bodyContent = ""; 089 090 boolean copyMessage = Boolean.parseBoolean(Framework.getProperty(COPY_MESSAGE, "false")); 091 092 try { 093 Message originalMessage = context.getMessage(); 094 if (log.isDebugEnabled()) { 095 log.debug("Transforming message, original subject: " + originalMessage.getSubject()); 096 } 097 098 // fully load the message before trying to parse to 099 // override most of server bugs, see 100 // http://java.sun.com/products/javamail/FAQ.html#imapserverbug 101 Message message; 102 if (originalMessage instanceof MimeMessage && copyMessage) { 103 message = new MimeMessage((MimeMessage) originalMessage); 104 if (log.isDebugEnabled()) { 105 log.debug("Transforming message after full load: " + message.getSubject()); 106 } 107 } else { 108 // stuck with the original one 109 message = originalMessage; 110 } 111 112 // Subject 113 String subject = message.getSubject(); 114 if (subject != null) { 115 subject = subject.trim(); 116 } 117 if (subject == null || "".equals(subject)) { 118 subject = "<Unknown>"; 119 } 120 context.put(SUBJECT_KEY, subject); 121 122 // Sender 123 try { 124 Address[] from = message.getFrom(); 125 String sender = null; 126 String senderEmail = null; 127 if (from != null) { 128 Address addr = from[0]; 129 if (addr instanceof InternetAddress) { 130 InternetAddress iAddr = (InternetAddress) addr; 131 senderEmail = iAddr.getAddress(); 132 sender = iAddr.getPersonal() + " <" + senderEmail + ">"; 133 } else { 134 sender += addr.toString(); 135 senderEmail = sender; 136 } 137 } 138 context.put(SENDER_KEY, sender); 139 context.put(SENDER_EMAIL_KEY, senderEmail); 140 } catch (AddressException ae) { 141 // try to parse sender from header instead 142 String[] values = message.getHeader("From"); 143 if (values != null) { 144 context.put(SENDER_KEY, values[0]); 145 } 146 } 147 // Sending date 148 context.put(SENDING_DATE_KEY, message.getSentDate()); 149 150 // Recipients 151 try { 152 Address[] to = message.getRecipients(Message.RecipientType.TO); 153 Collection<String> recipients = new ArrayList<>(); 154 if (to != null) { 155 for (Address addr : to) { 156 if (addr instanceof InternetAddress) { 157 InternetAddress iAddr = (InternetAddress) addr; 158 if (iAddr.getPersonal() != null) { 159 recipients.add(iAddr.getPersonal() + " <" + iAddr.getAddress() + ">"); 160 } else { 161 recipients.add(iAddr.getAddress()); 162 } 163 } else { 164 recipients.add(addr.toString()); 165 } 166 } 167 } 168 context.put(RECIPIENTS_KEY, recipients); 169 } catch (AddressException ae) { 170 // try to parse recipient from header instead 171 Collection<String> recipients = getHeaderValues(message, Message.RecipientType.TO.toString()); 172 context.put(RECIPIENTS_KEY, recipients); 173 } 174 175 // CC recipients 176 177 try { 178 Address[] toCC = message.getRecipients(Message.RecipientType.CC); 179 Collection<String> ccRecipients = new ArrayList<>(); 180 if (toCC != null) { 181 for (Address addr : toCC) { 182 if (addr instanceof InternetAddress) { 183 InternetAddress iAddr = (InternetAddress) addr; 184 ccRecipients.add(iAddr.getPersonal() + " " + iAddr.getAddress()); 185 } else { 186 ccRecipients.add(addr.toString()); 187 } 188 } 189 } 190 context.put(CC_RECIPIENTS_KEY, ccRecipients); 191 192 } catch (AddressException ae) { 193 // try to parse ccRecipient from header instead 194 Collection<String> ccRecipients = getHeaderValues(message, Message.RecipientType.CC.toString()); 195 context.put(CC_RECIPIENTS_KEY, ccRecipients); 196 } 197 String[] messageIdHeader = message.getHeader("Message-ID"); 198 if (messageIdHeader != null) { 199 context.put(MailCoreConstants.MESSAGE_ID_KEY, messageIdHeader[0]); 200 } 201 202 MimetypeRegistry mimeService = (MimetypeRegistry) context.getInitialContext().get(MIMETYPE_SERVICE_KEY); 203 204 List<Blob> blobs = new ArrayList<>(); 205 context.put(ATTACHMENTS_KEY, blobs); 206 context.put(CONTENT_KEY, new HashMap<String, String>()); 207 208 // String[] cte = message.getHeader("Content-Transfer-Encoding"); 209 210 // process content 211 getAttachmentParts(message, subject, mimeService, context); 212 213 context.put(TEXT_KEY, bodyContent); 214 215 return true; 216 } catch (MessagingException | IOException e) { 217 log.error(e, e); 218 } 219 return false; 220 } 221 222 protected static String getFilename(Part p, String defaultFileName) throws MessagingException { 223 String originalFilename = p.getFileName(); 224 if (originalFilename == null || originalFilename.trim().length() == 0) { 225 String filename = defaultFileName; 226 // using default filename => add extension for this type 227 if (p.isMimeType("text/plain")) { 228 filename += ".txt"; 229 } else if (p.isMimeType("text/html")) { 230 filename += ".html"; 231 } 232 return filename; 233 } else { 234 try { 235 return MimeUtility.decodeText(originalFilename.trim()); 236 } catch (UnsupportedEncodingException e) { 237 return originalFilename.trim(); 238 } 239 } 240 } 241 242 protected void getAttachmentParts(Part part, String defaultFilename, MimetypeRegistry mimeService, 243 ExecutionContext context) throws MessagingException, IOException { 244 String filename = getFilename(part, defaultFilename); 245 List<Blob> blobs = (List<Blob>) context.get(ATTACHMENTS_KEY); 246 Map<String, String> contentKeys = (Map<String, String>) context.get(CONTENT_KEY); 247 248 if (part.isMimeType("multipart/alternative")) { 249 bodyContent += getText(part, defaultFilename, mimeService, context); 250 } else { 251 if (!part.isMimeType("multipart/*")) { 252 String disp = part.getDisposition(); 253 // no disposition => mail body, which can be also blob (image for 254 // instance) 255 if (disp == null && // convert only text 256 part.getContentType().toLowerCase().startsWith("text/")) { 257 bodyContent += decodeMailBody(part); 258 } else { 259 Blob blob; 260 try (InputStream in = part.getInputStream()) { 261 blob = Blobs.createBlob(in); 262 } 263 String mime = DEFAULT_BINARY_MIMETYPE; 264 try { 265 if (mimeService != null) { 266 ContentType contentType = new ContentType(part.getContentType()); 267 mime = mimeService.getMimetypeFromFilenameAndBlobWithDefault(filename, blob, 268 contentType.getBaseType()); 269 } 270 } catch (MessagingException | MimetypeDetectionException e) { 271 log.error(e); 272 } 273 blob.setMimeType(mime); 274 275 blob.setFilename(filename); 276 if (part instanceof MimePart) { 277 String contentId = ((MimePart) part).getContentID(); 278 if (contentId != null) { 279 contentKeys.put(filename, contentId.replace("<", "").replace(">", "")); 280 } 281 } 282 blobs.add(blob); 283 } 284 } 285 286 if (part.isMimeType("multipart/*")) { 287 // This is a Multipart 288 Multipart mp = (Multipart) part.getContent(); 289 290 int count = mp.getCount(); 291 for (int i = 0; i < count; i++) { 292 getAttachmentParts(mp.getBodyPart(i), defaultFilename, mimeService, context); 293 } 294 } else if (part.isMimeType(MESSAGE_RFC822_MIMETYPE)) { 295 // This is a Nested Message 296 getAttachmentParts((Part) part.getContent(), defaultFilename, mimeService, context); 297 } 298 } 299 } 300 301 /** 302 * Return the primary text content of the message. 303 */ 304 private String getText(Part p, String defaultFilename, MimetypeRegistry mimeService, ExecutionContext context) 305 throws MessagingException, IOException { 306 if (p.isMimeType("text/*")) { 307 return decodeMailBody(p); 308 } 309 310 if (p.isMimeType("multipart/alternative")) { 311 // prefer html text over plain text 312 Multipart mp = (Multipart) p.getContent(); 313 String text = null; 314 for (int i = 0; i < mp.getCount(); i++) { 315 Part bp = mp.getBodyPart(i); 316 if (bp.isMimeType("text/plain")) { 317 if (text == null) { 318 text = getText(bp, defaultFilename, mimeService, context); 319 } 320 continue; 321 } else if (bp.isMimeType("text/html")) { 322 String s = getText(bp, defaultFilename, mimeService, context); 323 if (s != null) { 324 return s; 325 } 326 } else { 327 return getText(bp, defaultFilename, mimeService, context); 328 } 329 } 330 return text; 331 } else if (p.isMimeType("multipart/*")) { 332 Multipart mp = (Multipart) p.getContent(); 333 String s = null; 334 for (int i = 0; i < mp.getCount(); i++) { 335 BodyPart bodyPart = mp.getBodyPart(i); 336 if (Part.INLINE.equals(bodyPart.getDisposition())) { 337 getAttachmentParts(bodyPart, defaultFilename, mimeService, context); 338 } else { 339 s = getText(bodyPart, defaultFilename, mimeService, context); 340 } 341 } 342 if (s != null) { 343 return s; 344 } 345 } 346 347 return null; 348 } 349 350 /** 351 * Interprets the body accordingly to the charset used. It relies on the content type being 352 * ****;charset={charset};****** 353 * 354 * @return the decoded String 355 */ 356 protected static String decodeMailBody(Part part) throws MessagingException, IOException { 357 358 String encoding = null; 359 360 // try to get encoding from header rather than from Stream ! 361 // unfortunately, this does not seem to be reliable ... 362 /* 363 * String[] cteHeader = part.getHeader("Content-Transfer-Encoding"); if (cteHeader!=null && cteHeader.length>0) 364 * { encoding = cteHeader[0].toLowerCase(); } 365 */ 366 367 // fall back to default sniffing 368 // that will actually read the stream from server 369 if (encoding == null) { 370 encoding = MimeUtility.getEncoding(part.getDataHandler()); 371 } 372 373 String contType = part.getContentType(); 374 final String charsetIdentifier = "charset="; 375 final String ISO88591 = "iso-8859-1"; 376 final String WINDOWS1252 = "windows-1252"; 377 int offset = contType.indexOf(charsetIdentifier); 378 String charset = ""; 379 if (offset >= 0) { 380 charset = contType.substring(offset + charsetIdentifier.length()); 381 offset = charset.indexOf(";"); 382 if (offset > 0) { 383 charset = charset.substring(0, offset); 384 } 385 } 386 // Charset could be like "utf-8" or utf-8 387 if (!"".equals(charset)) { 388 charset = charset.replaceAll("\"", ""); 389 } 390 log.debug("Content type: " + contType + "; charset: " + charset); 391 if (charset.equalsIgnoreCase(ISO88591)) { 392 // see 393 // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#character1 394 // for more details see http://en.wikipedia.org/wiki/ISO_8859-1 395 // section "ISO-8859-1 and Windows-1252 confusion" 396 charset = WINDOWS1252; 397 log.debug("Using replacing charset: " + charset); 398 } 399 400 try (InputStream is = MimeUtility.decode(part.getInputStream(), encoding)) { 401 String ret; 402 byte[] streamContent = IOUtils.toByteArray(is); 403 if ("".equals(charset)) { 404 ret = new String(streamContent); 405 } else { 406 try { 407 ret = new String(streamContent, charset); 408 } catch (UnsupportedEncodingException e) { 409 // try without encoding 410 ret = new String(streamContent); 411 } 412 } 413 return ret; 414 } catch (IOException ex) { 415 log.error("Unable to read content", ex); 416 return ""; 417 } 418 } 419 420 public Collection<String> getHeaderValues(Message message, String headerName) throws MessagingException { 421 Collection<String> valuesList = new ArrayList<>(); 422 String[] values = message.getHeader(headerName); 423 if (values != null) { 424 for (String value : values) { 425 valuesList.add(value); 426 } 427 } 428 return valuesList; 429 } 430 431}