This week the "Salesforce Power User" board had a question about how to easily extract attachments so I took 10 minutes and coded up a solution. The solution uses www.capstorm.com's free SQLForce/Jython tool that has been around for years. I use it with clients every day to write one-off data manipulation scripts and for scheduled jobs that scrub and report on SFDC data. Every admin with scripting skills should have this tool in their back pocket.
Enough self-promotion...here's the solution...
-----------------------------------------------------------------------------------------------------
"""
Extract attachments from Salesforce and put them into regular files.
"""
import SQLForce
import os
import base64
##
## Put all attachments into this directory. A subdirectory will be created for each
## unique parent id. Attachments will be named the same as the "name" field in Salesforce.
##
outputDir="C:/tmp/attachments"
##
## SOQL that find the parent record ids where we want the attachments.
## Change this variable to find the owner's of attachments you want to extract.
##
attachmentParentSOQL="""
SELECT id FROM Account WHERE name LIKE 'Z%' LIMIT 5
"""
if not os.path.exists(outputDir):
os.makedirs(outputDir)
def extractAttachments( session, parentIds, outputDir):
extractSOQL = "SELECT id, parentId, body, contentType, isPrivate, name FROM attachment WHERE parentId IN ('" + "','".join(parentIds) + "')"
for rec in session.selectRecords(extractSOQL):
myDir = os.path.join(outputDir, rec.parentId)
if not os.path.exists(myDir):
os.makedirs(myDir)
fh = file(os.path.join(myDir, rec.name), 'wb')
fh.write( base64.b64decode(rec.body))
fh.close()
print rec.name
session = SQLForce.Session("sandbox")
parentIds = []
for rec in session.selectRecords(attachmentParentSOQL ):
parentIds.append( rec.id )
if len(parentIds) > 200 :
extractAttachments( session, parentIds, outputDir )
parentIds = []
if len(parentIds )>0:
extractAttachments( session, parentIds, outputDir )