001/* 002 * (C) Copyright 2006-2009 Nuxeo SAS (http://nuxeo.com/) and contributors. 003 * 004 * All rights reserved. This program and the accompanying materials 005 * are made available under the terms of the GNU Lesser General Public License 006 * (LGPL) version 2.1 which accompanies this distribution, and is available at 007 * http://www.gnu.org/licenses/lgpl.html 008 * 009 * This library is distributed in the hope that it will be useful, 010 * but WITHOUT ANY WARRANTY; without even the implied warranty of 011 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 012 * Lesser General Public License for more details. 013 * 014 * Contributors: 015 * Nuxeo - initial API and implementation 016 * 017 * $Id$ 018 */ 019 020package org.nuxeo.ecm.platform.mail.listener.action; 021 022import static org.nuxeo.ecm.platform.mail.utils.MailCoreConstants.ATTACHMENTS_KEY; 023import static org.nuxeo.ecm.platform.mail.utils.MailCoreConstants.CC_RECIPIENTS_KEY; 024import static org.nuxeo.ecm.platform.mail.utils.MailCoreConstants.MIMETYPE_SERVICE_KEY; 025import static org.nuxeo.ecm.platform.mail.utils.MailCoreConstants.RECIPIENTS_KEY; 026import static org.nuxeo.ecm.platform.mail.utils.MailCoreConstants.SENDER_EMAIL_KEY; 027import static org.nuxeo.ecm.platform.mail.utils.MailCoreConstants.SENDER_KEY; 028import static org.nuxeo.ecm.platform.mail.utils.MailCoreConstants.SENDING_DATE_KEY; 029import static org.nuxeo.ecm.platform.mail.utils.MailCoreConstants.SUBJECT_KEY; 030import static org.nuxeo.ecm.platform.mail.utils.MailCoreConstants.TEXT_KEY; 031 032import java.io.IOException; 033import java.io.InputStream; 034import java.io.UnsupportedEncodingException; 035import java.util.ArrayList; 036import java.util.Collection; 037import java.util.List; 038 039import javax.mail.Address; 040import javax.mail.Message; 041import javax.mail.MessagingException; 042import javax.mail.Multipart; 043import javax.mail.Part; 044import javax.mail.internet.AddressException; 045import javax.mail.internet.ContentType; 046import javax.mail.internet.InternetAddress; 047import javax.mail.internet.MimeMessage; 048import javax.mail.internet.MimeUtility; 049 050import org.apache.commons.logging.Log; 051import org.apache.commons.logging.LogFactory; 052import org.nuxeo.common.utils.FileUtils; 053import org.nuxeo.ecm.core.api.Blob; 054import org.nuxeo.ecm.core.api.Blobs; 055import org.nuxeo.ecm.platform.mail.action.ExecutionContext; 056import org.nuxeo.ecm.platform.mail.utils.MailCoreConstants; 057import org.nuxeo.ecm.platform.mimetype.MimetypeDetectionException; 058import org.nuxeo.ecm.platform.mimetype.interfaces.MimetypeRegistry; 059import org.nuxeo.runtime.api.Framework; 060 061/** 062 * Puts on the pipe execution context the values retrieved from the new messages found in the INBOX. These values are 063 * used later when new MailMessage documents are created based on them. 064 * 065 * @author Catalin Baican 066 */ 067public class ExtractMessageInformationAction extends AbstractMailAction { 068 069 private static final Log log = LogFactory.getLog(ExtractMessageInformationAction.class); 070 071 public static final String DEFAULT_BINARY_MIMETYPE = "application/octet-stream*"; 072 073 public static final String MESSAGE_RFC822_MIMETYPE = "message/rfc822"; 074 075 private String bodyContent; 076 077 public static final String COPY_MESSAGE = "org.nuxeo.mail.imap.copy"; 078 079 @Override 080 public boolean execute(ExecutionContext context) { 081 bodyContent = ""; 082 083 boolean copyMessage = Boolean.parseBoolean(Framework.getProperty(COPY_MESSAGE, "false")); 084 085 try { 086 Message originalMessage = context.getMessage(); 087 if (log.isDebugEnabled()) { 088 log.debug("Transforming message, original subject: " + originalMessage.getSubject()); 089 } 090 091 // fully load the message before trying to parse to 092 // override most of server bugs, see 093 // http://java.sun.com/products/javamail/FAQ.html#imapserverbug 094 Message message; 095 if (originalMessage instanceof MimeMessage && copyMessage) { 096 message = new MimeMessage((MimeMessage) originalMessage); 097 if (log.isDebugEnabled()) { 098 log.debug("Transforming message after full load: " + message.getSubject()); 099 } 100 } else { 101 // stuck with the original one 102 message = originalMessage; 103 } 104 105 // Subject 106 String subject = message.getSubject(); 107 if (subject != null) { 108 subject = subject.trim(); 109 } 110 if (subject == null || "".equals(subject)) { 111 subject = "<Unknown>"; 112 } 113 context.put(SUBJECT_KEY, subject); 114 115 // Sender 116 try { 117 Address[] from = message.getFrom(); 118 String sender = null; 119 String senderEmail = null; 120 if (from != null) { 121 Address addr = from[0]; 122 if (addr instanceof InternetAddress) { 123 InternetAddress iAddr = (InternetAddress) addr; 124 senderEmail = iAddr.getAddress(); 125 sender = iAddr.getPersonal() + " <" + senderEmail + ">"; 126 } else { 127 sender += addr.toString(); 128 senderEmail = sender; 129 } 130 } 131 context.put(SENDER_KEY, sender); 132 context.put(SENDER_EMAIL_KEY, senderEmail); 133 } catch (AddressException ae) { 134 // try to parse sender from header instead 135 String[] values = message.getHeader("From"); 136 if (values != null) { 137 context.put(SENDER_KEY, values[0]); 138 } 139 } 140 // Sending date 141 context.put(SENDING_DATE_KEY, message.getSentDate()); 142 143 // Recipients 144 try { 145 Address[] to = message.getRecipients(Message.RecipientType.TO); 146 Collection<String> recipients = new ArrayList<String>(); 147 if (to != null) { 148 for (Address addr : to) { 149 if (addr instanceof InternetAddress) { 150 InternetAddress iAddr = (InternetAddress) addr; 151 if (iAddr.getPersonal() != null) { 152 recipients.add(iAddr.getPersonal() + " <" + iAddr.getAddress() + ">"); 153 } else { 154 recipients.add(iAddr.getAddress()); 155 } 156 } else { 157 recipients.add(addr.toString()); 158 } 159 } 160 } 161 context.put(RECIPIENTS_KEY, recipients); 162 } catch (AddressException ae) { 163 // try to parse recipient from header instead 164 Collection<String> recipients = getHeaderValues(message, Message.RecipientType.TO.toString()); 165 context.put(RECIPIENTS_KEY, recipients); 166 } 167 168 // CC recipients 169 170 try { 171 Address[] toCC = message.getRecipients(Message.RecipientType.CC); 172 Collection<String> ccRecipients = new ArrayList<String>(); 173 if (toCC != null) { 174 for (Address addr : toCC) { 175 if (addr instanceof InternetAddress) { 176 InternetAddress iAddr = (InternetAddress) addr; 177 ccRecipients.add(iAddr.getPersonal() + " " + iAddr.getAddress()); 178 } else { 179 ccRecipients.add(addr.toString()); 180 } 181 } 182 } 183 context.put(CC_RECIPIENTS_KEY, ccRecipients); 184 185 } catch (AddressException ae) { 186 // try to parse ccRecipient from header instead 187 Collection<String> ccRecipients = getHeaderValues(message, Message.RecipientType.CC.toString()); 188 context.put(CC_RECIPIENTS_KEY, ccRecipients); 189 } 190 String[] messageIdHeader = message.getHeader("Message-ID"); 191 if (messageIdHeader != null) { 192 context.put(MailCoreConstants.MESSAGE_ID_KEY, messageIdHeader[0]); 193 } 194 195 MimetypeRegistry mimeService = (MimetypeRegistry) context.getInitialContext().get(MIMETYPE_SERVICE_KEY); 196 197 List<Blob> blobs = new ArrayList<Blob>(); 198 context.put(ATTACHMENTS_KEY, blobs); 199 200 // String[] cte = message.getHeader("Content-Transfer-Encoding"); 201 202 // process content 203 getAttachmentParts(message, subject, mimeService, context); 204 205 context.put(TEXT_KEY, bodyContent); 206 207 return true; 208 } catch (MessagingException | IOException e) { 209 log.error(e, e); 210 } 211 return false; 212 } 213 214 protected static String getFilename(Part p, String defaultFileName) throws MessagingException { 215 String originalFilename = p.getFileName(); 216 if (originalFilename == null || originalFilename.trim().length() == 0) { 217 String filename = defaultFileName; 218 // using default filename => add extension for this type 219 if (p.isMimeType("text/plain")) { 220 filename += ".txt"; 221 } else if (p.isMimeType("text/html")) { 222 filename += ".html"; 223 } 224 return filename; 225 } else { 226 try { 227 return MimeUtility.decodeText(originalFilename.trim()); 228 } catch (UnsupportedEncodingException e) { 229 return originalFilename.trim(); 230 } 231 } 232 } 233 234 protected void getAttachmentParts(Part part, String defaultFilename, MimetypeRegistry mimeService, 235 ExecutionContext context) throws MessagingException, IOException { 236 String filename = getFilename(part, defaultFilename); 237 List<Blob> blobs = (List<Blob>) context.get(ATTACHMENTS_KEY); 238 239 if (part.isMimeType("multipart/alternative")) { 240 bodyContent += getText(part); 241 } else { 242 if (!part.isMimeType("multipart/*")) { 243 String disp = part.getDisposition(); 244 // no disposition => mail body, which can be also blob (image for 245 // instance) 246 if (disp == null && // convert only text 247 part.getContentType().toLowerCase().startsWith("text/")) { 248 bodyContent += decodeMailBody(part); 249 } else { 250 Blob blob; 251 try (InputStream in = part.getInputStream()) { 252 blob = Blobs.createBlob(in); 253 } 254 String mime = DEFAULT_BINARY_MIMETYPE; 255 try { 256 if (mimeService != null) { 257 ContentType contentType = new ContentType(part.getContentType()); 258 mime = mimeService.getMimetypeFromFilenameAndBlobWithDefault(filename, blob, 259 contentType.getBaseType()); 260 } 261 } catch (MessagingException | MimetypeDetectionException e) { 262 log.error(e); 263 } 264 blob.setMimeType(mime); 265 266 blob.setFilename(filename); 267 268 blobs.add(blob); 269 } 270 } 271 272 if (part.isMimeType("multipart/*")) { 273 // This is a Multipart 274 Multipart mp = (Multipart) part.getContent(); 275 276 int count = mp.getCount(); 277 for (int i = 0; i < count; i++) { 278 getAttachmentParts(mp.getBodyPart(i), defaultFilename, mimeService, context); 279 } 280 } else if (part.isMimeType(MESSAGE_RFC822_MIMETYPE)) { 281 // This is a Nested Message 282 getAttachmentParts((Part) part.getContent(), defaultFilename, mimeService, context); 283 } 284 } 285 286 } 287 288 /** 289 * Return the primary text content of the message. 290 */ 291 private String getText(Part p) throws MessagingException, IOException { 292 if (p.isMimeType("text/*")) { 293 return decodeMailBody(p); 294 } 295 296 if (p.isMimeType("multipart/alternative")) { 297 // prefer html text over plain text 298 Multipart mp = (Multipart) p.getContent(); 299 String text = null; 300 for (int i = 0; i < mp.getCount(); i++) { 301 Part bp = mp.getBodyPart(i); 302 if (bp.isMimeType("text/plain")) { 303 if (text == null) { 304 text = getText(bp); 305 } 306 continue; 307 } else if (bp.isMimeType("text/html")) { 308 String s = getText(bp); 309 if (s != null) { 310 return s; 311 } 312 } else { 313 return getText(bp); 314 } 315 } 316 return text; 317 } else if (p.isMimeType("multipart/*")) { 318 Multipart mp = (Multipart) p.getContent(); 319 for (int i = 0; i < mp.getCount(); i++) { 320 String s = getText(mp.getBodyPart(i)); 321 if (s != null) { 322 return s; 323 } 324 } 325 } 326 327 return null; 328 } 329 330 /** 331 * Interprets the body accordingly to the charset used. It relies on the content type being 332 * ****;charset={charset};****** 333 * 334 * @return the decoded String 335 */ 336 protected static String decodeMailBody(Part part) throws MessagingException, IOException { 337 338 String encoding = null; 339 340 // try to get encoding from header rather than from Stream ! 341 // unfortunately, this does not seem to be reliable ... 342 /* 343 * String[] cteHeader = part.getHeader("Content-Transfer-Encoding"); if (cteHeader!=null && cteHeader.length>0) 344 * { encoding = cteHeader[0].toLowerCase(); } 345 */ 346 347 // fall back to default sniffing 348 // that will actually read the stream from server 349 if (encoding == null) { 350 encoding = MimeUtility.getEncoding(part.getDataHandler()); 351 } 352 353 InputStream is = null; 354 try { 355 is = MimeUtility.decode(part.getInputStream(), encoding); 356 } catch (IOException ex) { 357 log.error("Unable to read content", ex); 358 return ""; 359 } 360 361 String contType = part.getContentType(); 362 final String charsetIdentifier = "charset="; 363 final String ISO88591 = "iso-8859-1"; 364 final String WINDOWS1252 = "windows-1252"; 365 int offset = contType.indexOf(charsetIdentifier); 366 String charset = ""; 367 if (offset >= 0) { 368 charset = contType.substring(offset + charsetIdentifier.length()); 369 offset = charset.indexOf(";"); 370 if (offset > 0) { 371 charset = charset.substring(0, offset); 372 } 373 } 374 // Charset could be like "utf-8" or utf-8 375 if (!"".equals(charset)) { 376 charset = charset.replaceAll("\"", ""); 377 } 378 log.debug("Content type: " + contType + "; charset: " + charset); 379 if (charset.equalsIgnoreCase(ISO88591)) { 380 // see 381 // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#character1 382 // for more details see http://en.wikipedia.org/wiki/ISO_8859-1 383 // section "ISO-8859-1 and Windows-1252 confusion" 384 charset = WINDOWS1252; 385 log.debug("Using replacing charset: " + charset); 386 } 387 String ret; 388 byte[] streamContent = FileUtils.readBytes(is); 389 if ("".equals(charset)) { 390 ret = new String(streamContent); 391 } else { 392 try { 393 ret = new String(streamContent, charset); 394 } catch (UnsupportedEncodingException e) { 395 // try without encoding 396 ret = new String(streamContent); 397 } 398 } 399 return ret; 400 } 401 402 public Collection<String> getHeaderValues(Message message, String headerName) throws MessagingException { 403 Collection<String> valuesList = new ArrayList<String>(); 404 String[] values = message.getHeader(headerName); 405 if (values != null) { 406 for (String value : values) { 407 valuesList.add(value); 408 } 409 } 410 return valuesList; 411 } 412 413}