001/* 002 * (C) Copyright 2006-2009 Nuxeo SA (http://nuxeo.com/) and others. 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 * 016 * Contributors: 017 * Nuxeo - initial API and implementation 018 * 019 * $Id$ 020 */ 021 022package org.nuxeo.ecm.platform.mail.listener.action; 023 024import static org.nuxeo.ecm.platform.mail.utils.MailCoreConstants.ATTACHMENTS_KEY; 025import static org.nuxeo.ecm.platform.mail.utils.MailCoreConstants.CC_RECIPIENTS_KEY; 026import static org.nuxeo.ecm.platform.mail.utils.MailCoreConstants.MIMETYPE_SERVICE_KEY; 027import static org.nuxeo.ecm.platform.mail.utils.MailCoreConstants.RECIPIENTS_KEY; 028import static org.nuxeo.ecm.platform.mail.utils.MailCoreConstants.SENDER_EMAIL_KEY; 029import static org.nuxeo.ecm.platform.mail.utils.MailCoreConstants.SENDER_KEY; 030import static org.nuxeo.ecm.platform.mail.utils.MailCoreConstants.SENDING_DATE_KEY; 031import static org.nuxeo.ecm.platform.mail.utils.MailCoreConstants.SUBJECT_KEY; 032import static org.nuxeo.ecm.platform.mail.utils.MailCoreConstants.TEXT_KEY; 033 034import java.io.IOException; 035import java.io.InputStream; 036import java.io.UnsupportedEncodingException; 037import java.util.ArrayList; 038import java.util.Collection; 039import java.util.List; 040 041import javax.mail.Address; 042import javax.mail.Message; 043import javax.mail.MessagingException; 044import javax.mail.Multipart; 045import javax.mail.Part; 046import javax.mail.internet.AddressException; 047import javax.mail.internet.ContentType; 048import javax.mail.internet.InternetAddress; 049import javax.mail.internet.MimeMessage; 050import javax.mail.internet.MimeUtility; 051 052import org.apache.commons.io.IOUtils; 053import org.apache.commons.logging.Log; 054import org.apache.commons.logging.LogFactory; 055import org.nuxeo.ecm.core.api.Blob; 056import org.nuxeo.ecm.core.api.Blobs; 057import org.nuxeo.ecm.platform.mail.action.ExecutionContext; 058import org.nuxeo.ecm.platform.mail.utils.MailCoreConstants; 059import org.nuxeo.ecm.platform.mimetype.MimetypeDetectionException; 060import org.nuxeo.ecm.platform.mimetype.interfaces.MimetypeRegistry; 061import org.nuxeo.runtime.api.Framework; 062 063/** 064 * Puts on the pipe execution context the values retrieved from the new messages found in the INBOX. These values are 065 * used later when new MailMessage documents are created based on them. 066 * 067 * @author Catalin Baican 068 */ 069public class ExtractMessageInformationAction extends AbstractMailAction { 070 071 private static final Log log = LogFactory.getLog(ExtractMessageInformationAction.class); 072 073 public static final String DEFAULT_BINARY_MIMETYPE = "application/octet-stream*"; 074 075 public static final String MESSAGE_RFC822_MIMETYPE = "message/rfc822"; 076 077 private String bodyContent; 078 079 public static final String COPY_MESSAGE = "org.nuxeo.mail.imap.copy"; 080 081 @Override 082 public boolean execute(ExecutionContext context) { 083 bodyContent = ""; 084 085 boolean copyMessage = Boolean.parseBoolean(Framework.getProperty(COPY_MESSAGE, "false")); 086 087 try { 088 Message originalMessage = context.getMessage(); 089 if (log.isDebugEnabled()) { 090 log.debug("Transforming message, original subject: " + originalMessage.getSubject()); 091 } 092 093 // fully load the message before trying to parse to 094 // override most of server bugs, see 095 // http://java.sun.com/products/javamail/FAQ.html#imapserverbug 096 Message message; 097 if (originalMessage instanceof MimeMessage && copyMessage) { 098 message = new MimeMessage((MimeMessage) originalMessage); 099 if (log.isDebugEnabled()) { 100 log.debug("Transforming message after full load: " + message.getSubject()); 101 } 102 } else { 103 // stuck with the original one 104 message = originalMessage; 105 } 106 107 // Subject 108 String subject = message.getSubject(); 109 if (subject != null) { 110 subject = subject.trim(); 111 } 112 if (subject == null || "".equals(subject)) { 113 subject = "<Unknown>"; 114 } 115 context.put(SUBJECT_KEY, subject); 116 117 // Sender 118 try { 119 Address[] from = message.getFrom(); 120 String sender = null; 121 String senderEmail = null; 122 if (from != null) { 123 Address addr = from[0]; 124 if (addr instanceof InternetAddress) { 125 InternetAddress iAddr = (InternetAddress) addr; 126 senderEmail = iAddr.getAddress(); 127 sender = iAddr.getPersonal() + " <" + senderEmail + ">"; 128 } else { 129 sender += addr.toString(); 130 senderEmail = sender; 131 } 132 } 133 context.put(SENDER_KEY, sender); 134 context.put(SENDER_EMAIL_KEY, senderEmail); 135 } catch (AddressException ae) { 136 // try to parse sender from header instead 137 String[] values = message.getHeader("From"); 138 if (values != null) { 139 context.put(SENDER_KEY, values[0]); 140 } 141 } 142 // Sending date 143 context.put(SENDING_DATE_KEY, message.getSentDate()); 144 145 // Recipients 146 try { 147 Address[] to = message.getRecipients(Message.RecipientType.TO); 148 Collection<String> recipients = new ArrayList<String>(); 149 if (to != null) { 150 for (Address addr : to) { 151 if (addr instanceof InternetAddress) { 152 InternetAddress iAddr = (InternetAddress) addr; 153 if (iAddr.getPersonal() != null) { 154 recipients.add(iAddr.getPersonal() + " <" + iAddr.getAddress() + ">"); 155 } else { 156 recipients.add(iAddr.getAddress()); 157 } 158 } else { 159 recipients.add(addr.toString()); 160 } 161 } 162 } 163 context.put(RECIPIENTS_KEY, recipients); 164 } catch (AddressException ae) { 165 // try to parse recipient from header instead 166 Collection<String> recipients = getHeaderValues(message, Message.RecipientType.TO.toString()); 167 context.put(RECIPIENTS_KEY, recipients); 168 } 169 170 // CC recipients 171 172 try { 173 Address[] toCC = message.getRecipients(Message.RecipientType.CC); 174 Collection<String> ccRecipients = new ArrayList<String>(); 175 if (toCC != null) { 176 for (Address addr : toCC) { 177 if (addr instanceof InternetAddress) { 178 InternetAddress iAddr = (InternetAddress) addr; 179 ccRecipients.add(iAddr.getPersonal() + " " + iAddr.getAddress()); 180 } else { 181 ccRecipients.add(addr.toString()); 182 } 183 } 184 } 185 context.put(CC_RECIPIENTS_KEY, ccRecipients); 186 187 } catch (AddressException ae) { 188 // try to parse ccRecipient from header instead 189 Collection<String> ccRecipients = getHeaderValues(message, Message.RecipientType.CC.toString()); 190 context.put(CC_RECIPIENTS_KEY, ccRecipients); 191 } 192 String[] messageIdHeader = message.getHeader("Message-ID"); 193 if (messageIdHeader != null) { 194 context.put(MailCoreConstants.MESSAGE_ID_KEY, messageIdHeader[0]); 195 } 196 197 MimetypeRegistry mimeService = (MimetypeRegistry) context.getInitialContext().get(MIMETYPE_SERVICE_KEY); 198 199 List<Blob> blobs = new ArrayList<Blob>(); 200 context.put(ATTACHMENTS_KEY, blobs); 201 202 // String[] cte = message.getHeader("Content-Transfer-Encoding"); 203 204 // process content 205 getAttachmentParts(message, subject, mimeService, context); 206 207 context.put(TEXT_KEY, bodyContent); 208 209 return true; 210 } catch (MessagingException | IOException e) { 211 log.error(e, e); 212 } 213 return false; 214 } 215 216 protected static String getFilename(Part p, String defaultFileName) throws MessagingException { 217 String originalFilename = p.getFileName(); 218 if (originalFilename == null || originalFilename.trim().length() == 0) { 219 String filename = defaultFileName; 220 // using default filename => add extension for this type 221 if (p.isMimeType("text/plain")) { 222 filename += ".txt"; 223 } else if (p.isMimeType("text/html")) { 224 filename += ".html"; 225 } 226 return filename; 227 } else { 228 try { 229 return MimeUtility.decodeText(originalFilename.trim()); 230 } catch (UnsupportedEncodingException e) { 231 return originalFilename.trim(); 232 } 233 } 234 } 235 236 protected void getAttachmentParts(Part part, String defaultFilename, MimetypeRegistry mimeService, 237 ExecutionContext context) throws MessagingException, IOException { 238 String filename = getFilename(part, defaultFilename); 239 List<Blob> blobs = (List<Blob>) context.get(ATTACHMENTS_KEY); 240 241 if (part.isMimeType("multipart/alternative")) { 242 bodyContent += getText(part); 243 } else { 244 if (!part.isMimeType("multipart/*")) { 245 String disp = part.getDisposition(); 246 // no disposition => mail body, which can be also blob (image for 247 // instance) 248 if (disp == null && // convert only text 249 part.getContentType().toLowerCase().startsWith("text/")) { 250 bodyContent += decodeMailBody(part); 251 } else { 252 Blob blob; 253 try (InputStream in = part.getInputStream()) { 254 blob = Blobs.createBlob(in); 255 } 256 String mime = DEFAULT_BINARY_MIMETYPE; 257 try { 258 if (mimeService != null) { 259 ContentType contentType = new ContentType(part.getContentType()); 260 mime = mimeService.getMimetypeFromFilenameAndBlobWithDefault(filename, blob, 261 contentType.getBaseType()); 262 } 263 } catch (MessagingException | MimetypeDetectionException e) { 264 log.error(e); 265 } 266 blob.setMimeType(mime); 267 268 blob.setFilename(filename); 269 270 blobs.add(blob); 271 } 272 } 273 274 if (part.isMimeType("multipart/*")) { 275 // This is a Multipart 276 Multipart mp = (Multipart) part.getContent(); 277 278 int count = mp.getCount(); 279 for (int i = 0; i < count; i++) { 280 getAttachmentParts(mp.getBodyPart(i), defaultFilename, mimeService, context); 281 } 282 } else if (part.isMimeType(MESSAGE_RFC822_MIMETYPE)) { 283 // This is a Nested Message 284 getAttachmentParts((Part) part.getContent(), defaultFilename, mimeService, context); 285 } 286 } 287 288 } 289 290 /** 291 * Return the primary text content of the message. 292 */ 293 private String getText(Part p) throws MessagingException, IOException { 294 if (p.isMimeType("text/*")) { 295 return decodeMailBody(p); 296 } 297 298 if (p.isMimeType("multipart/alternative")) { 299 // prefer html text over plain text 300 Multipart mp = (Multipart) p.getContent(); 301 String text = null; 302 for (int i = 0; i < mp.getCount(); i++) { 303 Part bp = mp.getBodyPart(i); 304 if (bp.isMimeType("text/plain")) { 305 if (text == null) { 306 text = getText(bp); 307 } 308 continue; 309 } else if (bp.isMimeType("text/html")) { 310 String s = getText(bp); 311 if (s != null) { 312 return s; 313 } 314 } else { 315 return getText(bp); 316 } 317 } 318 return text; 319 } else if (p.isMimeType("multipart/*")) { 320 Multipart mp = (Multipart) p.getContent(); 321 for (int i = 0; i < mp.getCount(); i++) { 322 String s = getText(mp.getBodyPart(i)); 323 if (s != null) { 324 return s; 325 } 326 } 327 } 328 329 return null; 330 } 331 332 /** 333 * Interprets the body accordingly to the charset used. It relies on the content type being 334 * ****;charset={charset};****** 335 * 336 * @return the decoded String 337 */ 338 protected static String decodeMailBody(Part part) throws MessagingException, IOException { 339 340 String encoding = null; 341 342 // try to get encoding from header rather than from Stream ! 343 // unfortunately, this does not seem to be reliable ... 344 /* 345 * String[] cteHeader = part.getHeader("Content-Transfer-Encoding"); if (cteHeader!=null && cteHeader.length>0) 346 * { encoding = cteHeader[0].toLowerCase(); } 347 */ 348 349 // fall back to default sniffing 350 // that will actually read the stream from server 351 if (encoding == null) { 352 encoding = MimeUtility.getEncoding(part.getDataHandler()); 353 } 354 355 InputStream is = null; 356 try { 357 is = MimeUtility.decode(part.getInputStream(), encoding); 358 } catch (IOException ex) { 359 log.error("Unable to read content", ex); 360 return ""; 361 } 362 363 String contType = part.getContentType(); 364 final String charsetIdentifier = "charset="; 365 final String ISO88591 = "iso-8859-1"; 366 final String WINDOWS1252 = "windows-1252"; 367 int offset = contType.indexOf(charsetIdentifier); 368 String charset = ""; 369 if (offset >= 0) { 370 charset = contType.substring(offset + charsetIdentifier.length()); 371 offset = charset.indexOf(";"); 372 if (offset > 0) { 373 charset = charset.substring(0, offset); 374 } 375 } 376 // Charset could be like "utf-8" or utf-8 377 if (!"".equals(charset)) { 378 charset = charset.replaceAll("\"", ""); 379 } 380 log.debug("Content type: " + contType + "; charset: " + charset); 381 if (charset.equalsIgnoreCase(ISO88591)) { 382 // see 383 // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#character1 384 // for more details see http://en.wikipedia.org/wiki/ISO_8859-1 385 // section "ISO-8859-1 and Windows-1252 confusion" 386 charset = WINDOWS1252; 387 log.debug("Using replacing charset: " + charset); 388 } 389 String ret; 390 byte[] streamContent = IOUtils.toByteArray(is); 391 if ("".equals(charset)) { 392 ret = new String(streamContent); 393 } else { 394 try { 395 ret = new String(streamContent, charset); 396 } catch (UnsupportedEncodingException e) { 397 // try without encoding 398 ret = new String(streamContent); 399 } 400 } 401 return ret; 402 } 403 404 public Collection<String> getHeaderValues(Message message, String headerName) throws MessagingException { 405 Collection<String> valuesList = new ArrayList<String>(); 406 String[] values = message.getHeader(headerName); 407 if (values != null) { 408 for (String value : values) { 409 valuesList.add(value); 410 } 411 } 412 return valuesList; 413 } 414 415}