001/* 002 * (C) Copyright 2006-2009 Nuxeo SAS (http://nuxeo.com/) and contributors. 003 * 004 * All rights reserved. This program and the accompanying materials 005 * are made available under the terms of the GNU Lesser General Public License 006 * (LGPL) version 2.1 which accompanies this distribution, and is available at 007 * http://www.gnu.org/licenses/lgpl.html 008 * 009 * This library is distributed in the hope that it will be useful, 010 * but WITHOUT ANY WARRANTY; without even the implied warranty of 011 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 012 * Lesser General Public License for more details. 013 * 014 * Contributors: 015 * Nuxeo - initial API and implementation 016 * 017 * $Id$ 018 */ 019 020package org.nuxeo.ecm.core.convert.plugins.text.extractors; 021 022import java.io.File; 023import java.io.FileInputStream; 024import java.io.FileOutputStream; 025import java.io.IOException; 026import java.io.InputStream; 027import java.io.OutputStream; 028import java.io.Serializable; 029import java.util.ArrayList; 030import java.util.List; 031import java.util.Map; 032 033import javax.mail.Address; 034import javax.mail.Message.RecipientType; 035import javax.mail.MessagingException; 036import javax.mail.Multipart; 037import javax.mail.Part; 038import javax.mail.Session; 039import javax.mail.internet.ContentType; 040import javax.mail.internet.MimeMessage; 041 042import org.apache.commons.logging.Log; 043import org.apache.commons.logging.LogFactory; 044import org.nuxeo.ecm.core.api.Blob; 045import org.nuxeo.ecm.core.api.Blobs; 046import org.nuxeo.ecm.core.api.blobholder.BlobHolder; 047import org.nuxeo.ecm.core.api.blobholder.SimpleBlobHolder; 048import org.nuxeo.ecm.core.convert.api.ConversionException; 049import org.nuxeo.ecm.core.convert.api.ConversionService; 050import org.nuxeo.ecm.core.convert.cache.SimpleCachableBlobHolder; 051import org.nuxeo.ecm.core.convert.extension.Converter; 052import org.nuxeo.ecm.core.convert.extension.ConverterDescriptor; 053import org.nuxeo.runtime.api.Framework; 054 055public class RFC822ToTextConverter implements Converter { 056 057 private static final Log log = LogFactory.getLog(RFC822ToTextConverter.class); 058 059 private static final String MESSAGE_RFC822_MIMETYPE = "message/rfc822"; 060 061 private static final String TXT_MT = "text/plain"; 062 063 protected ConverterDescriptor descriptor; 064 065 protected Blob extractTextFromMessage(Blob blob) { 066 if (blob == null) { 067 return null; 068 } 069 File f = null; 070 OutputStream fo = null; 071 try { 072 MimeMessage msg = new MimeMessage((Session) null, blob.getStream()); 073 f = File.createTempFile("rfc822totext", ".txt"); 074 fo = new FileOutputStream(f); 075 List<Part> parts = getAttachmentParts(msg); 076 writeInfo(fo, msg.getSubject()); 077 writeInfo(fo, msg.getFrom()); 078 writeInfo(fo, msg.getRecipients(RecipientType.TO)); 079 writeInfo(fo, msg.getRecipients(RecipientType.CC)); 080 for (Part part : parts) { 081 writeInfo(fo, part.getFileName()); 082 writeInfo(fo, part.getDescription()); 083 byte[] extracted = extractTextFromMessagePart(part); 084 if (extracted != null) { 085 writeInfo(fo, extracted); 086 } 087 } 088 Blob outblob; 089 try (InputStream in = new FileInputStream(f)) { 090 outblob = Blobs.createBlob(in); 091 } 092 outblob.setMimeType(descriptor.getDestinationMimeType()); 093 return outblob; 094 } catch (IOException | MessagingException e) { 095 log.error(e); 096 } finally { 097 if (fo != null) { 098 try { 099 fo.close(); 100 } catch (IOException e) { 101 log.error(e); 102 } 103 } 104 if (f != null) { 105 f.delete(); 106 } 107 } 108 return null; 109 } 110 111 protected static void writeInfo(OutputStream stream, Address address) { 112 if (address != null) { 113 try { 114 stream.write(address.toString().getBytes()); 115 stream.write(" ".getBytes()); 116 } catch (IOException e) { 117 log.error(e, e); 118 } 119 } 120 } 121 122 protected static void writeInfo(OutputStream stream, Address[] addresses) { 123 if (addresses != null) { 124 for (Address address : addresses) { 125 writeInfo(stream, address); 126 } 127 } 128 } 129 130 protected static void writeInfo(OutputStream stream, String info) { 131 if (info != null) { 132 try { 133 stream.write(info.getBytes()); 134 stream.write(" ".getBytes()); 135 } catch (IOException e) { 136 log.error(e, e); 137 } 138 } 139 } 140 141 protected static void writeInfo(OutputStream stream, byte[] info) { 142 if (info != null) { 143 try { 144 stream.write(info); 145 stream.write(" ".getBytes()); 146 } catch (IOException e) { 147 log.error(e, e); 148 } 149 } 150 } 151 152 protected static byte[] extractTextFromMessagePart(Part p) throws MessagingException, IOException { 153 ContentType contentType = new ContentType(p.getContentType()); 154 String baseType = contentType.getBaseType(); 155 if (TXT_MT.equals(baseType)) { 156 Object content = p.getContent(); 157 if (content instanceof String) { 158 return ((String) content).getBytes(); 159 } else { 160 return null; 161 } 162 } 163 ConversionService cs = Framework.getLocalService(ConversionService.class); 164 165 String converterName = cs.getConverterName(baseType, TXT_MT); 166 if (converterName == null) { 167 return null; 168 } else { 169 Blob blob; 170 try (InputStream in = p.getInputStream()) { 171 blob = Blobs.createBlob(in); 172 } 173 BlobHolder result = cs.convert(converterName, new SimpleBlobHolder(blob), null); 174 return result.getBlob().getByteArray(); 175 } 176 } 177 178 protected static List<Part> getAttachmentParts(Part p) throws MessagingException, IOException { 179 List<Part> res = new ArrayList<Part>(); 180 if (p.isMimeType(MESSAGE_RFC822_MIMETYPE)) { 181 res.addAll(getAttachmentParts((Part) p.getContent())); 182 } else if (p.isMimeType("multipart/alternative")) { 183 // only return one of the text alternatives 184 Multipart mp = (Multipart) p.getContent(); 185 int count = mp.getCount(); 186 Part alternativePart = null; 187 for (int i = 0; i < count; i++) { 188 Part subPart = mp.getBodyPart(i); 189 if (subPart.isMimeType(TXT_MT)) { 190 alternativePart = subPart; 191 break; 192 } else if (subPart.isMimeType("text/*")) { 193 alternativePart = subPart; 194 } else { 195 res.addAll(getAttachmentParts(subPart)); 196 } 197 } 198 if (alternativePart != null) { 199 res.add(alternativePart); 200 } 201 } else if (p.isMimeType("multipart/*")) { 202 Multipart mp = (Multipart) p.getContent(); 203 int count = mp.getCount(); 204 for (int i = 0; i < count; i++) { 205 res.addAll(getAttachmentParts(mp.getBodyPart(i))); 206 } 207 } else { 208 res.add(p); 209 } 210 return res; 211 } 212 213 @Override 214 public BlobHolder convert(BlobHolder blobHolder, Map<String, Serializable> parameters) throws ConversionException { 215 Blob inputBlob = blobHolder.getBlob(); 216 Blob outputBlob = extractTextFromMessage(inputBlob); 217 return new SimpleCachableBlobHolder(outputBlob); 218 } 219 220 @Override 221 public void init(ConverterDescriptor descriptor) { 222 this.descriptor = descriptor; 223 } 224 225}